Daily bump.
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43
44
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
46
47 static tree
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49 {
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 "vect_array");
52 }
53
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
58
59 static tree
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
62 {
63 tree vect_type, vect, vect_name, array_ref;
64 gimple new_stmt;
65
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
72
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
78
79 return vect_name;
80 }
81
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
85
86 static void
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
89 {
90 tree array_ref;
91 gimple new_stmt;
92
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
96
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
100 }
101
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
104 (and its group). */
105
106 static tree
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
108 {
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
111
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
117 pi->misalign = 0;
118 return mem_ref;
119 }
120
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
122
123 /* Function vect_mark_relevant.
124
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
126
127 static void
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
131 {
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
135 gimple pattern_stmt;
136
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
139
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
145 {
146 bool found = false;
147 if (!used_in_pattern)
148 {
149 imm_use_iterator imm_iter;
150 use_operand_p use_p;
151 gimple use_stmt;
152 tree lhs;
153 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
154 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
155
156 if (is_gimple_assign (stmt))
157 lhs = gimple_assign_lhs (stmt);
158 else
159 lhs = gimple_call_lhs (stmt);
160
161 /* This use is out of pattern use, if LHS has other uses that are
162 pattern uses, we should mark the stmt itself, and not the pattern
163 stmt. */
164 if (TREE_CODE (lhs) == SSA_NAME)
165 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
166 {
167 if (is_gimple_debug (USE_STMT (use_p)))
168 continue;
169 use_stmt = USE_STMT (use_p);
170
171 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
172 continue;
173
174 if (vinfo_for_stmt (use_stmt)
175 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
176 {
177 found = true;
178 break;
179 }
180 }
181 }
182
183 if (!found)
184 {
185 /* This is the last stmt in a sequence that was detected as a
186 pattern that can potentially be vectorized. Don't mark the stmt
187 as relevant/live because it's not going to be vectorized.
188 Instead mark the pattern-stmt that replaces it. */
189
190 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
191
192 if (vect_print_dump_info (REPORT_DETAILS))
193 fprintf (vect_dump, "last stmt in pattern. don't mark"
194 " relevant/live.");
195 stmt_info = vinfo_for_stmt (pattern_stmt);
196 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
197 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
198 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
199 stmt = pattern_stmt;
200 }
201 }
202
203 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
204 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
205 STMT_VINFO_RELEVANT (stmt_info) = relevant;
206
207 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
208 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
209 {
210 if (vect_print_dump_info (REPORT_DETAILS))
211 fprintf (vect_dump, "already marked relevant/live.");
212 return;
213 }
214
215 VEC_safe_push (gimple, heap, *worklist, stmt);
216 }
217
218
219 /* Function vect_stmt_relevant_p.
220
221 Return true if STMT in loop that is represented by LOOP_VINFO is
222 "relevant for vectorization".
223
224 A stmt is considered "relevant for vectorization" if:
225 - it has uses outside the loop.
226 - it has vdefs (it alters memory).
227 - control stmts in the loop (except for the exit condition).
228
229 CHECKME: what other side effects would the vectorizer allow? */
230
231 static bool
232 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
233 enum vect_relevant *relevant, bool *live_p)
234 {
235 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
236 ssa_op_iter op_iter;
237 imm_use_iterator imm_iter;
238 use_operand_p use_p;
239 def_operand_p def_p;
240
241 *relevant = vect_unused_in_scope;
242 *live_p = false;
243
244 /* cond stmt other than loop exit cond. */
245 if (is_ctrl_stmt (stmt)
246 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
247 != loop_exit_ctrl_vec_info_type)
248 *relevant = vect_used_in_scope;
249
250 /* changing memory. */
251 if (gimple_code (stmt) != GIMPLE_PHI)
252 if (gimple_vdef (stmt))
253 {
254 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
256 *relevant = vect_used_in_scope;
257 }
258
259 /* uses outside the loop. */
260 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
261 {
262 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
263 {
264 basic_block bb = gimple_bb (USE_STMT (use_p));
265 if (!flow_bb_inside_loop_p (loop, bb))
266 {
267 if (vect_print_dump_info (REPORT_DETAILS))
268 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
269
270 if (is_gimple_debug (USE_STMT (use_p)))
271 continue;
272
273 /* We expect all such uses to be in the loop exit phis
274 (because of loop closed form) */
275 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
276 gcc_assert (bb == single_exit (loop)->dest);
277
278 *live_p = true;
279 }
280 }
281 }
282
283 return (*live_p || *relevant);
284 }
285
286
287 /* Function exist_non_indexing_operands_for_use_p
288
289 USE is one of the uses attached to STMT. Check if USE is
290 used in STMT for anything other than indexing an array. */
291
292 static bool
293 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
294 {
295 tree operand;
296 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
297
298 /* USE corresponds to some operand in STMT. If there is no data
299 reference in STMT, then any operand that corresponds to USE
300 is not indexing an array. */
301 if (!STMT_VINFO_DATA_REF (stmt_info))
302 return true;
303
304 /* STMT has a data_ref. FORNOW this means that its of one of
305 the following forms:
306 -1- ARRAY_REF = var
307 -2- var = ARRAY_REF
308 (This should have been verified in analyze_data_refs).
309
310 'var' in the second case corresponds to a def, not a use,
311 so USE cannot correspond to any operands that are not used
312 for array indexing.
313
314 Therefore, all we need to check is if STMT falls into the
315 first case, and whether var corresponds to USE. */
316
317 if (!gimple_assign_copy_p (stmt))
318 return false;
319 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
320 return false;
321 operand = gimple_assign_rhs1 (stmt);
322 if (TREE_CODE (operand) != SSA_NAME)
323 return false;
324
325 if (operand == use)
326 return true;
327
328 return false;
329 }
330
331
332 /*
333 Function process_use.
334
335 Inputs:
336 - a USE in STMT in a loop represented by LOOP_VINFO
337 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
338 that defined USE. This is done by calling mark_relevant and passing it
339 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
340 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
341 be performed.
342
343 Outputs:
344 Generally, LIVE_P and RELEVANT are used to define the liveness and
345 relevance info of the DEF_STMT of this USE:
346 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
347 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
348 Exceptions:
349 - case 1: If USE is used only for address computations (e.g. array indexing),
350 which does not need to be directly vectorized, then the liveness/relevance
351 of the respective DEF_STMT is left unchanged.
352 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
353 skip DEF_STMT cause it had already been processed.
354 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
355 be modified accordingly.
356
357 Return true if everything is as expected. Return false otherwise. */
358
359 static bool
360 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
361 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
362 bool force)
363 {
364 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
365 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
366 stmt_vec_info dstmt_vinfo;
367 basic_block bb, def_bb;
368 tree def;
369 gimple def_stmt;
370 enum vect_def_type dt;
371
372 /* case 1: we are only interested in uses that need to be vectorized. Uses
373 that are used for address computation are not considered relevant. */
374 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
375 return true;
376
377 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
378 {
379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
380 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
381 return false;
382 }
383
384 if (!def_stmt || gimple_nop_p (def_stmt))
385 return true;
386
387 def_bb = gimple_bb (def_stmt);
388 if (!flow_bb_inside_loop_p (loop, def_bb))
389 {
390 if (vect_print_dump_info (REPORT_DETAILS))
391 fprintf (vect_dump, "def_stmt is out of loop.");
392 return true;
393 }
394
395 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
396 DEF_STMT must have already been processed, because this should be the
397 only way that STMT, which is a reduction-phi, was put in the worklist,
398 as there should be no other uses for DEF_STMT in the loop. So we just
399 check that everything is as expected, and we are done. */
400 dstmt_vinfo = vinfo_for_stmt (def_stmt);
401 bb = gimple_bb (stmt);
402 if (gimple_code (stmt) == GIMPLE_PHI
403 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
404 && gimple_code (def_stmt) != GIMPLE_PHI
405 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
406 && bb->loop_father == def_bb->loop_father)
407 {
408 if (vect_print_dump_info (REPORT_DETAILS))
409 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
410 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
411 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
412 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
413 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
414 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
415 return true;
416 }
417
418 /* case 3a: outer-loop stmt defining an inner-loop stmt:
419 outer-loop-header-bb:
420 d = def_stmt
421 inner-loop:
422 stmt # use (d)
423 outer-loop-tail-bb:
424 ... */
425 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
426 {
427 if (vect_print_dump_info (REPORT_DETAILS))
428 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
429
430 switch (relevant)
431 {
432 case vect_unused_in_scope:
433 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
434 vect_used_in_scope : vect_unused_in_scope;
435 break;
436
437 case vect_used_in_outer_by_reduction:
438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439 relevant = vect_used_by_reduction;
440 break;
441
442 case vect_used_in_outer:
443 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
444 relevant = vect_used_in_scope;
445 break;
446
447 case vect_used_in_scope:
448 break;
449
450 default:
451 gcc_unreachable ();
452 }
453 }
454
455 /* case 3b: inner-loop stmt defining an outer-loop stmt:
456 outer-loop-header-bb:
457 ...
458 inner-loop:
459 d = def_stmt
460 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
461 stmt # use (d) */
462 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
463 {
464 if (vect_print_dump_info (REPORT_DETAILS))
465 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
466
467 switch (relevant)
468 {
469 case vect_unused_in_scope:
470 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
471 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
472 vect_used_in_outer_by_reduction : vect_unused_in_scope;
473 break;
474
475 case vect_used_by_reduction:
476 relevant = vect_used_in_outer_by_reduction;
477 break;
478
479 case vect_used_in_scope:
480 relevant = vect_used_in_outer;
481 break;
482
483 default:
484 gcc_unreachable ();
485 }
486 }
487
488 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
489 is_pattern_stmt_p (stmt_vinfo));
490 return true;
491 }
492
493
494 /* Function vect_mark_stmts_to_be_vectorized.
495
496 Not all stmts in the loop need to be vectorized. For example:
497
498 for i...
499 for j...
500 1. T0 = i + j
501 2. T1 = a[T0]
502
503 3. j = j + 1
504
505 Stmt 1 and 3 do not need to be vectorized, because loop control and
506 addressing of vectorized data-refs are handled differently.
507
508 This pass detects such stmts. */
509
510 bool
511 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
512 {
513 VEC(gimple,heap) *worklist;
514 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
515 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
516 unsigned int nbbs = loop->num_nodes;
517 gimple_stmt_iterator si;
518 gimple stmt;
519 unsigned int i;
520 stmt_vec_info stmt_vinfo;
521 basic_block bb;
522 gimple phi;
523 bool live_p;
524 enum vect_relevant relevant, tmp_relevant;
525 enum vect_def_type def_type;
526
527 if (vect_print_dump_info (REPORT_DETAILS))
528 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
529
530 worklist = VEC_alloc (gimple, heap, 64);
531
532 /* 1. Init worklist. */
533 for (i = 0; i < nbbs; i++)
534 {
535 bb = bbs[i];
536 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
537 {
538 phi = gsi_stmt (si);
539 if (vect_print_dump_info (REPORT_DETAILS))
540 {
541 fprintf (vect_dump, "init: phi relevant? ");
542 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
543 }
544
545 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
546 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
547 }
548 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
549 {
550 stmt = gsi_stmt (si);
551 if (vect_print_dump_info (REPORT_DETAILS))
552 {
553 fprintf (vect_dump, "init: stmt relevant? ");
554 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
555 }
556
557 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
558 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
559 }
560 }
561
562 /* 2. Process_worklist */
563 while (VEC_length (gimple, worklist) > 0)
564 {
565 use_operand_p use_p;
566 ssa_op_iter iter;
567
568 stmt = VEC_pop (gimple, worklist);
569 if (vect_print_dump_info (REPORT_DETAILS))
570 {
571 fprintf (vect_dump, "worklist: examine stmt: ");
572 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
573 }
574
575 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
576 (DEF_STMT) as relevant/irrelevant and live/dead according to the
577 liveness and relevance properties of STMT. */
578 stmt_vinfo = vinfo_for_stmt (stmt);
579 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
580 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
581
582 /* Generally, the liveness and relevance properties of STMT are
583 propagated as is to the DEF_STMTs of its USEs:
584 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
585 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
586
587 One exception is when STMT has been identified as defining a reduction
588 variable; in this case we set the liveness/relevance as follows:
589 live_p = false
590 relevant = vect_used_by_reduction
591 This is because we distinguish between two kinds of relevant stmts -
592 those that are used by a reduction computation, and those that are
593 (also) used by a regular computation. This allows us later on to
594 identify stmts that are used solely by a reduction, and therefore the
595 order of the results that they produce does not have to be kept. */
596
597 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
598 tmp_relevant = relevant;
599 switch (def_type)
600 {
601 case vect_reduction_def:
602 switch (tmp_relevant)
603 {
604 case vect_unused_in_scope:
605 relevant = vect_used_by_reduction;
606 break;
607
608 case vect_used_by_reduction:
609 if (gimple_code (stmt) == GIMPLE_PHI)
610 break;
611 /* fall through */
612
613 default:
614 if (vect_print_dump_info (REPORT_DETAILS))
615 fprintf (vect_dump, "unsupported use of reduction.");
616
617 VEC_free (gimple, heap, worklist);
618 return false;
619 }
620
621 live_p = false;
622 break;
623
624 case vect_nested_cycle:
625 if (tmp_relevant != vect_unused_in_scope
626 && tmp_relevant != vect_used_in_outer_by_reduction
627 && tmp_relevant != vect_used_in_outer)
628 {
629 if (vect_print_dump_info (REPORT_DETAILS))
630 fprintf (vect_dump, "unsupported use of nested cycle.");
631
632 VEC_free (gimple, heap, worklist);
633 return false;
634 }
635
636 live_p = false;
637 break;
638
639 case vect_double_reduction_def:
640 if (tmp_relevant != vect_unused_in_scope
641 && tmp_relevant != vect_used_by_reduction)
642 {
643 if (vect_print_dump_info (REPORT_DETAILS))
644 fprintf (vect_dump, "unsupported use of double reduction.");
645
646 VEC_free (gimple, heap, worklist);
647 return false;
648 }
649
650 live_p = false;
651 break;
652
653 default:
654 break;
655 }
656
657 if (is_pattern_stmt_p (stmt_vinfo))
658 {
659 /* Pattern statements are not inserted into the code, so
660 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
661 have to scan the RHS or function arguments instead. */
662 if (is_gimple_assign (stmt))
663 {
664 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
665 tree op = gimple_assign_rhs1 (stmt);
666
667 i = 1;
668 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
669 {
670 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
671 live_p, relevant, &worklist, false)
672 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
673 live_p, relevant, &worklist, false))
674 {
675 VEC_free (gimple, heap, worklist);
676 return false;
677 }
678 i = 2;
679 }
680 for (; i < gimple_num_ops (stmt); i++)
681 {
682 op = gimple_op (stmt, i);
683 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
684 &worklist, false))
685 {
686 VEC_free (gimple, heap, worklist);
687 return false;
688 }
689 }
690 }
691 else if (is_gimple_call (stmt))
692 {
693 for (i = 0; i < gimple_call_num_args (stmt); i++)
694 {
695 tree arg = gimple_call_arg (stmt, i);
696 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
697 &worklist, false))
698 {
699 VEC_free (gimple, heap, worklist);
700 return false;
701 }
702 }
703 }
704 }
705 else
706 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
707 {
708 tree op = USE_FROM_PTR (use_p);
709 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
710 &worklist, false))
711 {
712 VEC_free (gimple, heap, worklist);
713 return false;
714 }
715 }
716
717 if (STMT_VINFO_GATHER_P (stmt_vinfo))
718 {
719 tree off;
720 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
721 gcc_assert (decl);
722 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
723 &worklist, true))
724 {
725 VEC_free (gimple, heap, worklist);
726 return false;
727 }
728 }
729 } /* while worklist */
730
731 VEC_free (gimple, heap, worklist);
732 return true;
733 }
734
735
736 /* Get cost by calling cost target builtin. */
737
738 static inline
739 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
740 {
741 tree dummy_type = NULL;
742 int dummy = 0;
743
744 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
745 dummy_type, dummy);
746 }
747
748
749 /* Get cost for STMT. */
750
751 int
752 cost_for_stmt (gimple stmt)
753 {
754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
755
756 switch (STMT_VINFO_TYPE (stmt_info))
757 {
758 case load_vec_info_type:
759 return vect_get_stmt_cost (scalar_load);
760 case store_vec_info_type:
761 return vect_get_stmt_cost (scalar_store);
762 case op_vec_info_type:
763 case condition_vec_info_type:
764 case assignment_vec_info_type:
765 case reduc_vec_info_type:
766 case induc_vec_info_type:
767 case type_promotion_vec_info_type:
768 case type_demotion_vec_info_type:
769 case type_conversion_vec_info_type:
770 case call_vec_info_type:
771 return vect_get_stmt_cost (scalar_stmt);
772 case undef_vec_info_type:
773 default:
774 gcc_unreachable ();
775 }
776 }
777
778 /* Function vect_model_simple_cost.
779
780 Models cost for simple operations, i.e. those that only emit ncopies of a
781 single op. Right now, this does not account for multiple insns that could
782 be generated for the single vector op. We will handle that shortly. */
783
784 void
785 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
786 enum vect_def_type *dt, slp_tree slp_node)
787 {
788 int i;
789 int inside_cost = 0, outside_cost = 0;
790
791 /* The SLP costs were already calculated during SLP tree build. */
792 if (PURE_SLP_STMT (stmt_info))
793 return;
794
795 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
796
797 /* FORNOW: Assuming maximum 2 args per stmts. */
798 for (i = 0; i < 2; i++)
799 {
800 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
801 outside_cost += vect_get_stmt_cost (vector_stmt);
802 }
803
804 if (vect_print_dump_info (REPORT_COST))
805 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
806 "outside_cost = %d .", inside_cost, outside_cost);
807
808 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
809 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
810 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
811 }
812
813
814 /* Model cost for type demotion and promotion operations. PWR is normally
815 zero for single-step promotions and demotions. It will be one if
816 two-step promotion/demotion is required, and so on. Each additional
817 step doubles the number of instructions required. */
818
819 static void
820 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
821 enum vect_def_type *dt, int pwr)
822 {
823 int i, tmp;
824 int inside_cost = 0, outside_cost = 0, single_stmt_cost;
825
826 /* The SLP costs were already calculated during SLP tree build. */
827 if (PURE_SLP_STMT (stmt_info))
828 return;
829
830 single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
831 for (i = 0; i < pwr + 1; i++)
832 {
833 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
834 (i + 1) : i;
835 inside_cost += vect_pow2 (tmp) * single_stmt_cost;
836 }
837
838 /* FORNOW: Assuming maximum 2 args per stmts. */
839 for (i = 0; i < 2; i++)
840 {
841 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
842 outside_cost += vect_get_stmt_cost (vector_stmt);
843 }
844
845 if (vect_print_dump_info (REPORT_COST))
846 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
847 "outside_cost = %d .", inside_cost, outside_cost);
848
849 /* Set the costs in STMT_INFO. */
850 stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
851 stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
852 }
853
854 /* Function vect_cost_strided_group_size
855
856 For strided load or store, return the group_size only if it is the first
857 load or store of a group, else return 1. This ensures that group size is
858 only returned once per group. */
859
860 static int
861 vect_cost_strided_group_size (stmt_vec_info stmt_info)
862 {
863 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
864
865 if (first_stmt == STMT_VINFO_STMT (stmt_info))
866 return GROUP_SIZE (stmt_info);
867
868 return 1;
869 }
870
871
872 /* Function vect_model_store_cost
873
874 Models cost for stores. In the case of strided accesses, one access
875 has the overhead of the strided access attributed to it. */
876
877 void
878 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
879 bool store_lanes_p, enum vect_def_type dt,
880 slp_tree slp_node)
881 {
882 int group_size;
883 unsigned int inside_cost = 0, outside_cost = 0;
884 struct data_reference *first_dr;
885 gimple first_stmt;
886
887 /* The SLP costs were already calculated during SLP tree build. */
888 if (PURE_SLP_STMT (stmt_info))
889 return;
890
891 if (dt == vect_constant_def || dt == vect_external_def)
892 outside_cost = vect_get_stmt_cost (scalar_to_vec);
893
894 /* Strided access? */
895 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
896 {
897 if (slp_node)
898 {
899 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
900 group_size = 1;
901 }
902 else
903 {
904 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
905 group_size = vect_cost_strided_group_size (stmt_info);
906 }
907
908 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
909 }
910 /* Not a strided access. */
911 else
912 {
913 group_size = 1;
914 first_dr = STMT_VINFO_DATA_REF (stmt_info);
915 }
916
917 /* We assume that the cost of a single store-lanes instruction is
918 equivalent to the cost of GROUP_SIZE separate stores. If a strided
919 access is instead being provided by a permute-and-store operation,
920 include the cost of the permutes. */
921 if (!store_lanes_p && group_size > 1)
922 {
923 /* Uses a high and low interleave operation for each needed permute. */
924 inside_cost = ncopies * exact_log2(group_size) * group_size
925 * vect_get_stmt_cost (vec_perm);
926
927 if (vect_print_dump_info (REPORT_COST))
928 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
929 group_size);
930 }
931
932 /* Costs of the stores. */
933 vect_get_store_cost (first_dr, ncopies, &inside_cost);
934
935 if (vect_print_dump_info (REPORT_COST))
936 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
937 "outside_cost = %d .", inside_cost, outside_cost);
938
939 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
940 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
941 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
942 }
943
944
945 /* Calculate cost of DR's memory access. */
946 void
947 vect_get_store_cost (struct data_reference *dr, int ncopies,
948 unsigned int *inside_cost)
949 {
950 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
951
952 switch (alignment_support_scheme)
953 {
954 case dr_aligned:
955 {
956 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
957
958 if (vect_print_dump_info (REPORT_COST))
959 fprintf (vect_dump, "vect_model_store_cost: aligned.");
960
961 break;
962 }
963
964 case dr_unaligned_supported:
965 {
966 gimple stmt = DR_STMT (dr);
967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
968 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
969
970 /* Here, we assign an additional cost for the unaligned store. */
971 *inside_cost += ncopies
972 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
973 vectype, DR_MISALIGNMENT (dr));
974
975 if (vect_print_dump_info (REPORT_COST))
976 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
977 "hardware.");
978
979 break;
980 }
981
982 default:
983 gcc_unreachable ();
984 }
985 }
986
987
988 /* Function vect_model_load_cost
989
990 Models cost for loads. In the case of strided accesses, the last access
991 has the overhead of the strided access attributed to it. Since unaligned
992 accesses are supported for loads, we also account for the costs of the
993 access scheme chosen. */
994
995 void
996 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
997 slp_tree slp_node)
998 {
999 int group_size;
1000 gimple first_stmt;
1001 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1002 unsigned int inside_cost = 0, outside_cost = 0;
1003
1004 /* The SLP costs were already calculated during SLP tree build. */
1005 if (PURE_SLP_STMT (stmt_info))
1006 return;
1007
1008 /* Strided accesses? */
1009 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1010 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
1011 {
1012 group_size = vect_cost_strided_group_size (stmt_info);
1013 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1014 }
1015 /* Not a strided access. */
1016 else
1017 {
1018 group_size = 1;
1019 first_dr = dr;
1020 }
1021
1022 /* We assume that the cost of a single load-lanes instruction is
1023 equivalent to the cost of GROUP_SIZE separate loads. If a strided
1024 access is instead being provided by a load-and-permute operation,
1025 include the cost of the permutes. */
1026 if (!load_lanes_p && group_size > 1)
1027 {
1028 /* Uses an even and odd extract operations for each needed permute. */
1029 inside_cost = ncopies * exact_log2(group_size) * group_size
1030 * vect_get_stmt_cost (vec_perm);
1031
1032 if (vect_print_dump_info (REPORT_COST))
1033 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
1034 group_size);
1035 }
1036
1037 /* The loads themselves. */
1038 vect_get_load_cost (first_dr, ncopies,
1039 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
1040 || slp_node),
1041 &inside_cost, &outside_cost);
1042
1043 if (vect_print_dump_info (REPORT_COST))
1044 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1045 "outside_cost = %d .", inside_cost, outside_cost);
1046
1047 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1048 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1049 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1050 }
1051
1052
1053 /* Calculate cost of DR's memory access. */
1054 void
1055 vect_get_load_cost (struct data_reference *dr, int ncopies,
1056 bool add_realign_cost, unsigned int *inside_cost,
1057 unsigned int *outside_cost)
1058 {
1059 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1060
1061 switch (alignment_support_scheme)
1062 {
1063 case dr_aligned:
1064 {
1065 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1066
1067 if (vect_print_dump_info (REPORT_COST))
1068 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1069
1070 break;
1071 }
1072 case dr_unaligned_supported:
1073 {
1074 gimple stmt = DR_STMT (dr);
1075 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1076 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1077
1078 /* Here, we assign an additional cost for the unaligned load. */
1079 *inside_cost += ncopies
1080 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1081 vectype, DR_MISALIGNMENT (dr));
1082 if (vect_print_dump_info (REPORT_COST))
1083 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1084 "hardware.");
1085
1086 break;
1087 }
1088 case dr_explicit_realign:
1089 {
1090 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1091 + vect_get_stmt_cost (vec_perm));
1092
1093 /* FIXME: If the misalignment remains fixed across the iterations of
1094 the containing loop, the following cost should be added to the
1095 outside costs. */
1096 if (targetm.vectorize.builtin_mask_for_load)
1097 *inside_cost += vect_get_stmt_cost (vector_stmt);
1098
1099 if (vect_print_dump_info (REPORT_COST))
1100 fprintf (vect_dump, "vect_model_load_cost: explicit realign");
1101
1102 break;
1103 }
1104 case dr_explicit_realign_optimized:
1105 {
1106 if (vect_print_dump_info (REPORT_COST))
1107 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1108 "pipelined.");
1109
1110 /* Unaligned software pipeline has a load of an address, an initial
1111 load, and possibly a mask operation to "prime" the loop. However,
1112 if this is an access in a group of loads, which provide strided
1113 access, then the above cost should only be considered for one
1114 access in the group. Inside the loop, there is a load op
1115 and a realignment op. */
1116
1117 if (add_realign_cost)
1118 {
1119 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1120 if (targetm.vectorize.builtin_mask_for_load)
1121 *outside_cost += vect_get_stmt_cost (vector_stmt);
1122 }
1123
1124 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1125 + vect_get_stmt_cost (vec_perm));
1126
1127 if (vect_print_dump_info (REPORT_COST))
1128 fprintf (vect_dump,
1129 "vect_model_load_cost: explicit realign optimized");
1130
1131 break;
1132 }
1133
1134 default:
1135 gcc_unreachable ();
1136 }
1137 }
1138
1139
1140 /* Function vect_init_vector.
1141
1142 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1143 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1144 is not NULL. Otherwise, place the initialization at the loop preheader.
1145 Return the DEF of INIT_STMT.
1146 It will be used in the vectorization of STMT. */
1147
1148 tree
1149 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1150 gimple_stmt_iterator *gsi)
1151 {
1152 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1153 tree new_var;
1154 gimple init_stmt;
1155 tree vec_oprnd;
1156 edge pe;
1157 tree new_temp;
1158 basic_block new_bb;
1159
1160 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1161 add_referenced_var (new_var);
1162 init_stmt = gimple_build_assign (new_var, vector_var);
1163 new_temp = make_ssa_name (new_var, init_stmt);
1164 gimple_assign_set_lhs (init_stmt, new_temp);
1165
1166 if (gsi)
1167 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1168 else
1169 {
1170 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1171
1172 if (loop_vinfo)
1173 {
1174 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1175
1176 if (nested_in_vect_loop_p (loop, stmt))
1177 loop = loop->inner;
1178
1179 pe = loop_preheader_edge (loop);
1180 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1181 gcc_assert (!new_bb);
1182 }
1183 else
1184 {
1185 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1186 basic_block bb;
1187 gimple_stmt_iterator gsi_bb_start;
1188
1189 gcc_assert (bb_vinfo);
1190 bb = BB_VINFO_BB (bb_vinfo);
1191 gsi_bb_start = gsi_after_labels (bb);
1192 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1193 }
1194 }
1195
1196 if (vect_print_dump_info (REPORT_DETAILS))
1197 {
1198 fprintf (vect_dump, "created new init_stmt: ");
1199 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1200 }
1201
1202 vec_oprnd = gimple_assign_lhs (init_stmt);
1203 return vec_oprnd;
1204 }
1205
1206
1207 /* Function vect_get_vec_def_for_operand.
1208
1209 OP is an operand in STMT. This function returns a (vector) def that will be
1210 used in the vectorized stmt for STMT.
1211
1212 In the case that OP is an SSA_NAME which is defined in the loop, then
1213 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1214
1215 In case OP is an invariant or constant, a new stmt that creates a vector def
1216 needs to be introduced. */
1217
1218 tree
1219 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1220 {
1221 tree vec_oprnd;
1222 gimple vec_stmt;
1223 gimple def_stmt;
1224 stmt_vec_info def_stmt_info = NULL;
1225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1226 unsigned int nunits;
1227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228 tree vec_inv;
1229 tree vec_cst;
1230 tree t = NULL_TREE;
1231 tree def;
1232 int i;
1233 enum vect_def_type dt;
1234 bool is_simple_use;
1235 tree vector_type;
1236
1237 if (vect_print_dump_info (REPORT_DETAILS))
1238 {
1239 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1240 print_generic_expr (vect_dump, op, TDF_SLIM);
1241 }
1242
1243 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1244 &def_stmt, &def, &dt);
1245 gcc_assert (is_simple_use);
1246 if (vect_print_dump_info (REPORT_DETAILS))
1247 {
1248 if (def)
1249 {
1250 fprintf (vect_dump, "def = ");
1251 print_generic_expr (vect_dump, def, TDF_SLIM);
1252 }
1253 if (def_stmt)
1254 {
1255 fprintf (vect_dump, " def_stmt = ");
1256 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1257 }
1258 }
1259
1260 switch (dt)
1261 {
1262 /* Case 1: operand is a constant. */
1263 case vect_constant_def:
1264 {
1265 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1266 gcc_assert (vector_type);
1267 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1268
1269 if (scalar_def)
1270 *scalar_def = op;
1271
1272 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1273 if (vect_print_dump_info (REPORT_DETAILS))
1274 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1275
1276 vec_cst = build_vector_from_val (vector_type,
1277 fold_convert (TREE_TYPE (vector_type),
1278 op));
1279 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1280 }
1281
1282 /* Case 2: operand is defined outside the loop - loop invariant. */
1283 case vect_external_def:
1284 {
1285 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1286 gcc_assert (vector_type);
1287 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1288
1289 if (scalar_def)
1290 *scalar_def = def;
1291
1292 /* Create 'vec_inv = {inv,inv,..,inv}' */
1293 if (vect_print_dump_info (REPORT_DETAILS))
1294 fprintf (vect_dump, "Create vector_inv.");
1295
1296 for (i = nunits - 1; i >= 0; --i)
1297 {
1298 t = tree_cons (NULL_TREE, def, t);
1299 }
1300
1301 /* FIXME: use build_constructor directly. */
1302 vec_inv = build_constructor_from_list (vector_type, t);
1303 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1304 }
1305
1306 /* Case 3: operand is defined inside the loop. */
1307 case vect_internal_def:
1308 {
1309 if (scalar_def)
1310 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1311
1312 /* Get the def from the vectorized stmt. */
1313 def_stmt_info = vinfo_for_stmt (def_stmt);
1314
1315 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1316 /* Get vectorized pattern statement. */
1317 if (!vec_stmt
1318 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1319 && !STMT_VINFO_RELEVANT (def_stmt_info))
1320 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1321 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1322 gcc_assert (vec_stmt);
1323 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1324 vec_oprnd = PHI_RESULT (vec_stmt);
1325 else if (is_gimple_call (vec_stmt))
1326 vec_oprnd = gimple_call_lhs (vec_stmt);
1327 else
1328 vec_oprnd = gimple_assign_lhs (vec_stmt);
1329 return vec_oprnd;
1330 }
1331
1332 /* Case 4: operand is defined by a loop header phi - reduction */
1333 case vect_reduction_def:
1334 case vect_double_reduction_def:
1335 case vect_nested_cycle:
1336 {
1337 struct loop *loop;
1338
1339 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1340 loop = (gimple_bb (def_stmt))->loop_father;
1341
1342 /* Get the def before the loop */
1343 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1344 return get_initial_def_for_reduction (stmt, op, scalar_def);
1345 }
1346
1347 /* Case 5: operand is defined by loop-header phi - induction. */
1348 case vect_induction_def:
1349 {
1350 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1351
1352 /* Get the def from the vectorized stmt. */
1353 def_stmt_info = vinfo_for_stmt (def_stmt);
1354 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1355 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1356 vec_oprnd = PHI_RESULT (vec_stmt);
1357 else
1358 vec_oprnd = gimple_get_lhs (vec_stmt);
1359 return vec_oprnd;
1360 }
1361
1362 default:
1363 gcc_unreachable ();
1364 }
1365 }
1366
1367
1368 /* Function vect_get_vec_def_for_stmt_copy
1369
1370 Return a vector-def for an operand. This function is used when the
1371 vectorized stmt to be created (by the caller to this function) is a "copy"
1372 created in case the vectorized result cannot fit in one vector, and several
1373 copies of the vector-stmt are required. In this case the vector-def is
1374 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1375 of the stmt that defines VEC_OPRND.
1376 DT is the type of the vector def VEC_OPRND.
1377
1378 Context:
1379 In case the vectorization factor (VF) is bigger than the number
1380 of elements that can fit in a vectype (nunits), we have to generate
1381 more than one vector stmt to vectorize the scalar stmt. This situation
1382 arises when there are multiple data-types operated upon in the loop; the
1383 smallest data-type determines the VF, and as a result, when vectorizing
1384 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1385 vector stmt (each computing a vector of 'nunits' results, and together
1386 computing 'VF' results in each iteration). This function is called when
1387 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1388 which VF=16 and nunits=4, so the number of copies required is 4):
1389
1390 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1391
1392 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1393 VS1.1: vx.1 = memref1 VS1.2
1394 VS1.2: vx.2 = memref2 VS1.3
1395 VS1.3: vx.3 = memref3
1396
1397 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1398 VSnew.1: vz1 = vx.1 + ... VSnew.2
1399 VSnew.2: vz2 = vx.2 + ... VSnew.3
1400 VSnew.3: vz3 = vx.3 + ...
1401
1402 The vectorization of S1 is explained in vectorizable_load.
1403 The vectorization of S2:
1404 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1405 the function 'vect_get_vec_def_for_operand' is called to
1406 get the relevant vector-def for each operand of S2. For operand x it
1407 returns the vector-def 'vx.0'.
1408
1409 To create the remaining copies of the vector-stmt (VSnew.j), this
1410 function is called to get the relevant vector-def for each operand. It is
1411 obtained from the respective VS1.j stmt, which is recorded in the
1412 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1413
1414 For example, to obtain the vector-def 'vx.1' in order to create the
1415 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1416 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1417 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1418 and return its def ('vx.1').
1419 Overall, to create the above sequence this function will be called 3 times:
1420 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1421 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1422 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1423
1424 tree
1425 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1426 {
1427 gimple vec_stmt_for_operand;
1428 stmt_vec_info def_stmt_info;
1429
1430 /* Do nothing; can reuse same def. */
1431 if (dt == vect_external_def || dt == vect_constant_def )
1432 return vec_oprnd;
1433
1434 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1435 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1436 gcc_assert (def_stmt_info);
1437 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1438 gcc_assert (vec_stmt_for_operand);
1439 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1440 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1441 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1442 else
1443 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1444 return vec_oprnd;
1445 }
1446
1447
1448 /* Get vectorized definitions for the operands to create a copy of an original
1449 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1450
1451 static void
1452 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1453 VEC(tree,heap) **vec_oprnds0,
1454 VEC(tree,heap) **vec_oprnds1)
1455 {
1456 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1457
1458 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1459 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1460
1461 if (vec_oprnds1 && *vec_oprnds1)
1462 {
1463 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1464 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1465 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1466 }
1467 }
1468
1469
1470 /* Get vectorized definitions for OP0 and OP1.
1471 REDUC_INDEX is the index of reduction operand in case of reduction,
1472 and -1 otherwise. */
1473
1474 void
1475 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1476 VEC (tree, heap) **vec_oprnds0,
1477 VEC (tree, heap) **vec_oprnds1,
1478 slp_tree slp_node, int reduc_index)
1479 {
1480 if (slp_node)
1481 {
1482 int nops = (op1 == NULL_TREE) ? 1 : 2;
1483 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1484 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1485
1486 VEC_quick_push (tree, ops, op0);
1487 if (op1)
1488 VEC_quick_push (tree, ops, op1);
1489
1490 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1491
1492 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1493 if (op1)
1494 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1495
1496 VEC_free (tree, heap, ops);
1497 VEC_free (slp_void_p, heap, vec_defs);
1498 }
1499 else
1500 {
1501 tree vec_oprnd;
1502
1503 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1504 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1505 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1506
1507 if (op1)
1508 {
1509 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1510 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1511 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1512 }
1513 }
1514 }
1515
1516
1517 /* Function vect_finish_stmt_generation.
1518
1519 Insert a new stmt. */
1520
1521 void
1522 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1523 gimple_stmt_iterator *gsi)
1524 {
1525 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1526 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1527 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1528
1529 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1530
1531 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1532
1533 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1534 bb_vinfo));
1535
1536 if (vect_print_dump_info (REPORT_DETAILS))
1537 {
1538 fprintf (vect_dump, "add new stmt: ");
1539 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1540 }
1541
1542 gimple_set_location (vec_stmt, gimple_location (stmt));
1543 }
1544
1545 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1546 a function declaration if the target has a vectorized version
1547 of the function, or NULL_TREE if the function cannot be vectorized. */
1548
1549 tree
1550 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1551 {
1552 tree fndecl = gimple_call_fndecl (call);
1553
1554 /* We only handle functions that do not read or clobber memory -- i.e.
1555 const or novops ones. */
1556 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1557 return NULL_TREE;
1558
1559 if (!fndecl
1560 || TREE_CODE (fndecl) != FUNCTION_DECL
1561 || !DECL_BUILT_IN (fndecl))
1562 return NULL_TREE;
1563
1564 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1565 vectype_in);
1566 }
1567
1568 /* Function vectorizable_call.
1569
1570 Check if STMT performs a function call that can be vectorized.
1571 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1572 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1573 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1574
1575 static bool
1576 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1577 slp_tree slp_node)
1578 {
1579 tree vec_dest;
1580 tree scalar_dest;
1581 tree op, type;
1582 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1583 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1584 tree vectype_out, vectype_in;
1585 int nunits_in;
1586 int nunits_out;
1587 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1588 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1589 tree fndecl, new_temp, def, rhs_type;
1590 gimple def_stmt;
1591 enum vect_def_type dt[3]
1592 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1593 gimple new_stmt = NULL;
1594 int ncopies, j;
1595 VEC(tree, heap) *vargs = NULL;
1596 enum { NARROW, NONE, WIDEN } modifier;
1597 size_t i, nargs;
1598 tree lhs;
1599
1600 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1601 return false;
1602
1603 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1604 return false;
1605
1606 /* Is STMT a vectorizable call? */
1607 if (!is_gimple_call (stmt))
1608 return false;
1609
1610 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1611 return false;
1612
1613 if (stmt_can_throw_internal (stmt))
1614 return false;
1615
1616 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1617
1618 /* Process function arguments. */
1619 rhs_type = NULL_TREE;
1620 vectype_in = NULL_TREE;
1621 nargs = gimple_call_num_args (stmt);
1622
1623 /* Bail out if the function has more than three arguments, we do not have
1624 interesting builtin functions to vectorize with more than two arguments
1625 except for fma. No arguments is also not good. */
1626 if (nargs == 0 || nargs > 3)
1627 return false;
1628
1629 for (i = 0; i < nargs; i++)
1630 {
1631 tree opvectype;
1632
1633 op = gimple_call_arg (stmt, i);
1634
1635 /* We can only handle calls with arguments of the same type. */
1636 if (rhs_type
1637 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1638 {
1639 if (vect_print_dump_info (REPORT_DETAILS))
1640 fprintf (vect_dump, "argument types differ.");
1641 return false;
1642 }
1643 if (!rhs_type)
1644 rhs_type = TREE_TYPE (op);
1645
1646 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1647 &def_stmt, &def, &dt[i], &opvectype))
1648 {
1649 if (vect_print_dump_info (REPORT_DETAILS))
1650 fprintf (vect_dump, "use not simple.");
1651 return false;
1652 }
1653
1654 if (!vectype_in)
1655 vectype_in = opvectype;
1656 else if (opvectype
1657 && opvectype != vectype_in)
1658 {
1659 if (vect_print_dump_info (REPORT_DETAILS))
1660 fprintf (vect_dump, "argument vector types differ.");
1661 return false;
1662 }
1663 }
1664 /* If all arguments are external or constant defs use a vector type with
1665 the same size as the output vector type. */
1666 if (!vectype_in)
1667 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1668 if (vec_stmt)
1669 gcc_assert (vectype_in);
1670 if (!vectype_in)
1671 {
1672 if (vect_print_dump_info (REPORT_DETAILS))
1673 {
1674 fprintf (vect_dump, "no vectype for scalar type ");
1675 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1676 }
1677
1678 return false;
1679 }
1680
1681 /* FORNOW */
1682 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1683 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1684 if (nunits_in == nunits_out / 2)
1685 modifier = NARROW;
1686 else if (nunits_out == nunits_in)
1687 modifier = NONE;
1688 else if (nunits_out == nunits_in / 2)
1689 modifier = WIDEN;
1690 else
1691 return false;
1692
1693 /* For now, we only vectorize functions if a target specific builtin
1694 is available. TODO -- in some cases, it might be profitable to
1695 insert the calls for pieces of the vector, in order to be able
1696 to vectorize other operations in the loop. */
1697 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1698 if (fndecl == NULL_TREE)
1699 {
1700 if (vect_print_dump_info (REPORT_DETAILS))
1701 fprintf (vect_dump, "function is not vectorizable.");
1702
1703 return false;
1704 }
1705
1706 gcc_assert (!gimple_vuse (stmt));
1707
1708 if (slp_node || PURE_SLP_STMT (stmt_info))
1709 ncopies = 1;
1710 else if (modifier == NARROW)
1711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1712 else
1713 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1714
1715 /* Sanity check: make sure that at least one copy of the vectorized stmt
1716 needs to be generated. */
1717 gcc_assert (ncopies >= 1);
1718
1719 if (!vec_stmt) /* transformation not required. */
1720 {
1721 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1722 if (vect_print_dump_info (REPORT_DETAILS))
1723 fprintf (vect_dump, "=== vectorizable_call ===");
1724 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1725 return true;
1726 }
1727
1728 /** Transform. **/
1729
1730 if (vect_print_dump_info (REPORT_DETAILS))
1731 fprintf (vect_dump, "transform call.");
1732
1733 /* Handle def. */
1734 scalar_dest = gimple_call_lhs (stmt);
1735 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1736
1737 prev_stmt_info = NULL;
1738 switch (modifier)
1739 {
1740 case NONE:
1741 for (j = 0; j < ncopies; ++j)
1742 {
1743 /* Build argument list for the vectorized call. */
1744 if (j == 0)
1745 vargs = VEC_alloc (tree, heap, nargs);
1746 else
1747 VEC_truncate (tree, vargs, 0);
1748
1749 if (slp_node)
1750 {
1751 VEC (slp_void_p, heap) *vec_defs
1752 = VEC_alloc (slp_void_p, heap, nargs);
1753 VEC (tree, heap) *vec_oprnds0;
1754
1755 for (i = 0; i < nargs; i++)
1756 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1757 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1758 vec_oprnds0
1759 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1760
1761 /* Arguments are ready. Create the new vector stmt. */
1762 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1763 {
1764 size_t k;
1765 for (k = 0; k < nargs; k++)
1766 {
1767 VEC (tree, heap) *vec_oprndsk
1768 = (VEC (tree, heap) *)
1769 VEC_index (slp_void_p, vec_defs, k);
1770 VEC_replace (tree, vargs, k,
1771 VEC_index (tree, vec_oprndsk, i));
1772 }
1773 new_stmt = gimple_build_call_vec (fndecl, vargs);
1774 new_temp = make_ssa_name (vec_dest, new_stmt);
1775 gimple_call_set_lhs (new_stmt, new_temp);
1776 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1777 mark_symbols_for_renaming (new_stmt);
1778 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1779 new_stmt);
1780 }
1781
1782 for (i = 0; i < nargs; i++)
1783 {
1784 VEC (tree, heap) *vec_oprndsi
1785 = (VEC (tree, heap) *)
1786 VEC_index (slp_void_p, vec_defs, i);
1787 VEC_free (tree, heap, vec_oprndsi);
1788 }
1789 VEC_free (slp_void_p, heap, vec_defs);
1790 continue;
1791 }
1792
1793 for (i = 0; i < nargs; i++)
1794 {
1795 op = gimple_call_arg (stmt, i);
1796 if (j == 0)
1797 vec_oprnd0
1798 = vect_get_vec_def_for_operand (op, stmt, NULL);
1799 else
1800 {
1801 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1802 vec_oprnd0
1803 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1804 }
1805
1806 VEC_quick_push (tree, vargs, vec_oprnd0);
1807 }
1808
1809 new_stmt = gimple_build_call_vec (fndecl, vargs);
1810 new_temp = make_ssa_name (vec_dest, new_stmt);
1811 gimple_call_set_lhs (new_stmt, new_temp);
1812
1813 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1814 mark_symbols_for_renaming (new_stmt);
1815
1816 if (j == 0)
1817 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1818 else
1819 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1820
1821 prev_stmt_info = vinfo_for_stmt (new_stmt);
1822 }
1823
1824 break;
1825
1826 case NARROW:
1827 for (j = 0; j < ncopies; ++j)
1828 {
1829 /* Build argument list for the vectorized call. */
1830 if (j == 0)
1831 vargs = VEC_alloc (tree, heap, nargs * 2);
1832 else
1833 VEC_truncate (tree, vargs, 0);
1834
1835 if (slp_node)
1836 {
1837 VEC (slp_void_p, heap) *vec_defs
1838 = VEC_alloc (slp_void_p, heap, nargs);
1839 VEC (tree, heap) *vec_oprnds0;
1840
1841 for (i = 0; i < nargs; i++)
1842 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1843 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1844 vec_oprnds0
1845 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1846
1847 /* Arguments are ready. Create the new vector stmt. */
1848 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1849 i += 2)
1850 {
1851 size_t k;
1852 VEC_truncate (tree, vargs, 0);
1853 for (k = 0; k < nargs; k++)
1854 {
1855 VEC (tree, heap) *vec_oprndsk
1856 = (VEC (tree, heap) *)
1857 VEC_index (slp_void_p, vec_defs, k);
1858 VEC_quick_push (tree, vargs,
1859 VEC_index (tree, vec_oprndsk, i));
1860 VEC_quick_push (tree, vargs,
1861 VEC_index (tree, vec_oprndsk, i + 1));
1862 }
1863 new_stmt = gimple_build_call_vec (fndecl, vargs);
1864 new_temp = make_ssa_name (vec_dest, new_stmt);
1865 gimple_call_set_lhs (new_stmt, new_temp);
1866 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1867 mark_symbols_for_renaming (new_stmt);
1868 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1869 new_stmt);
1870 }
1871
1872 for (i = 0; i < nargs; i++)
1873 {
1874 VEC (tree, heap) *vec_oprndsi
1875 = (VEC (tree, heap) *)
1876 VEC_index (slp_void_p, vec_defs, i);
1877 VEC_free (tree, heap, vec_oprndsi);
1878 }
1879 VEC_free (slp_void_p, heap, vec_defs);
1880 continue;
1881 }
1882
1883 for (i = 0; i < nargs; i++)
1884 {
1885 op = gimple_call_arg (stmt, i);
1886 if (j == 0)
1887 {
1888 vec_oprnd0
1889 = vect_get_vec_def_for_operand (op, stmt, NULL);
1890 vec_oprnd1
1891 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1892 }
1893 else
1894 {
1895 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1896 vec_oprnd0
1897 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1898 vec_oprnd1
1899 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1900 }
1901
1902 VEC_quick_push (tree, vargs, vec_oprnd0);
1903 VEC_quick_push (tree, vargs, vec_oprnd1);
1904 }
1905
1906 new_stmt = gimple_build_call_vec (fndecl, vargs);
1907 new_temp = make_ssa_name (vec_dest, new_stmt);
1908 gimple_call_set_lhs (new_stmt, new_temp);
1909
1910 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1911 mark_symbols_for_renaming (new_stmt);
1912
1913 if (j == 0)
1914 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1915 else
1916 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1917
1918 prev_stmt_info = vinfo_for_stmt (new_stmt);
1919 }
1920
1921 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1922
1923 break;
1924
1925 case WIDEN:
1926 /* No current target implements this case. */
1927 return false;
1928 }
1929
1930 VEC_free (tree, heap, vargs);
1931
1932 /* Update the exception handling table with the vector stmt if necessary. */
1933 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1934 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1935
1936 /* The call in STMT might prevent it from being removed in dce.
1937 We however cannot remove it here, due to the way the ssa name
1938 it defines is mapped to the new definition. So just replace
1939 rhs of the statement with something harmless. */
1940
1941 if (slp_node)
1942 return true;
1943
1944 type = TREE_TYPE (scalar_dest);
1945 if (is_pattern_stmt_p (stmt_info))
1946 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1947 else
1948 lhs = gimple_call_lhs (stmt);
1949 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1950 set_vinfo_for_stmt (new_stmt, stmt_info);
1951 set_vinfo_for_stmt (stmt, NULL);
1952 STMT_VINFO_STMT (stmt_info) = new_stmt;
1953 gsi_replace (gsi, new_stmt, false);
1954 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1955
1956 return true;
1957 }
1958
1959
1960 /* Function vect_gen_widened_results_half
1961
1962 Create a vector stmt whose code, type, number of arguments, and result
1963 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1964 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1965 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1966 needs to be created (DECL is a function-decl of a target-builtin).
1967 STMT is the original scalar stmt that we are vectorizing. */
1968
1969 static gimple
1970 vect_gen_widened_results_half (enum tree_code code,
1971 tree decl,
1972 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1973 tree vec_dest, gimple_stmt_iterator *gsi,
1974 gimple stmt)
1975 {
1976 gimple new_stmt;
1977 tree new_temp;
1978
1979 /* Generate half of the widened result: */
1980 if (code == CALL_EXPR)
1981 {
1982 /* Target specific support */
1983 if (op_type == binary_op)
1984 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1985 else
1986 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1987 new_temp = make_ssa_name (vec_dest, new_stmt);
1988 gimple_call_set_lhs (new_stmt, new_temp);
1989 }
1990 else
1991 {
1992 /* Generic support */
1993 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1994 if (op_type != binary_op)
1995 vec_oprnd1 = NULL;
1996 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1997 vec_oprnd1);
1998 new_temp = make_ssa_name (vec_dest, new_stmt);
1999 gimple_assign_set_lhs (new_stmt, new_temp);
2000 }
2001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2002
2003 return new_stmt;
2004 }
2005
2006
2007 /* Get vectorized definitions for loop-based vectorization. For the first
2008 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2009 scalar operand), and for the rest we get a copy with
2010 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2011 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2012 The vectors are collected into VEC_OPRNDS. */
2013
2014 static void
2015 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2016 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2017 {
2018 tree vec_oprnd;
2019
2020 /* Get first vector operand. */
2021 /* All the vector operands except the very first one (that is scalar oprnd)
2022 are stmt copies. */
2023 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2024 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2025 else
2026 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2027
2028 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2029
2030 /* Get second vector operand. */
2031 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2032 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2033
2034 *oprnd = vec_oprnd;
2035
2036 /* For conversion in multiple steps, continue to get operands
2037 recursively. */
2038 if (multi_step_cvt)
2039 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2040 }
2041
2042
2043 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2044 For multi-step conversions store the resulting vectors and call the function
2045 recursively. */
2046
2047 static void
2048 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2049 int multi_step_cvt, gimple stmt,
2050 VEC (tree, heap) *vec_dsts,
2051 gimple_stmt_iterator *gsi,
2052 slp_tree slp_node, enum tree_code code,
2053 stmt_vec_info *prev_stmt_info)
2054 {
2055 unsigned int i;
2056 tree vop0, vop1, new_tmp, vec_dest;
2057 gimple new_stmt;
2058 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2059
2060 vec_dest = VEC_pop (tree, vec_dsts);
2061
2062 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2063 {
2064 /* Create demotion operation. */
2065 vop0 = VEC_index (tree, *vec_oprnds, i);
2066 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2067 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2068 new_tmp = make_ssa_name (vec_dest, new_stmt);
2069 gimple_assign_set_lhs (new_stmt, new_tmp);
2070 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2071
2072 if (multi_step_cvt)
2073 /* Store the resulting vector for next recursive call. */
2074 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2075 else
2076 {
2077 /* This is the last step of the conversion sequence. Store the
2078 vectors in SLP_NODE or in vector info of the scalar statement
2079 (or in STMT_VINFO_RELATED_STMT chain). */
2080 if (slp_node)
2081 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2082 else
2083 {
2084 if (!*prev_stmt_info)
2085 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2086 else
2087 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2088
2089 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2090 }
2091 }
2092 }
2093
2094 /* For multi-step demotion operations we first generate demotion operations
2095 from the source type to the intermediate types, and then combine the
2096 results (stored in VEC_OPRNDS) in demotion operation to the destination
2097 type. */
2098 if (multi_step_cvt)
2099 {
2100 /* At each level of recursion we have half of the operands we had at the
2101 previous level. */
2102 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2103 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2104 stmt, vec_dsts, gsi, slp_node,
2105 VEC_PACK_TRUNC_EXPR,
2106 prev_stmt_info);
2107 }
2108
2109 VEC_quick_push (tree, vec_dsts, vec_dest);
2110 }
2111
2112
2113 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2114 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2115 the resulting vectors and call the function recursively. */
2116
2117 static void
2118 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2119 VEC (tree, heap) **vec_oprnds1,
2120 gimple stmt, tree vec_dest,
2121 gimple_stmt_iterator *gsi,
2122 enum tree_code code1,
2123 enum tree_code code2, tree decl1,
2124 tree decl2, int op_type)
2125 {
2126 int i;
2127 tree vop0, vop1, new_tmp1, new_tmp2;
2128 gimple new_stmt1, new_stmt2;
2129 VEC (tree, heap) *vec_tmp = NULL;
2130
2131 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2132 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2133 {
2134 if (op_type == binary_op)
2135 vop1 = VEC_index (tree, *vec_oprnds1, i);
2136 else
2137 vop1 = NULL_TREE;
2138
2139 /* Generate the two halves of promotion operation. */
2140 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2141 op_type, vec_dest, gsi, stmt);
2142 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2143 op_type, vec_dest, gsi, stmt);
2144 if (is_gimple_call (new_stmt1))
2145 {
2146 new_tmp1 = gimple_call_lhs (new_stmt1);
2147 new_tmp2 = gimple_call_lhs (new_stmt2);
2148 }
2149 else
2150 {
2151 new_tmp1 = gimple_assign_lhs (new_stmt1);
2152 new_tmp2 = gimple_assign_lhs (new_stmt2);
2153 }
2154
2155 /* Store the results for the next step. */
2156 VEC_quick_push (tree, vec_tmp, new_tmp1);
2157 VEC_quick_push (tree, vec_tmp, new_tmp2);
2158 }
2159
2160 VEC_free (tree, heap, *vec_oprnds0);
2161 *vec_oprnds0 = vec_tmp;
2162 }
2163
2164
2165 /* Check if STMT performs a conversion operation, that can be vectorized.
2166 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2167 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2168 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2169
2170 static bool
2171 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2172 gimple *vec_stmt, slp_tree slp_node)
2173 {
2174 tree vec_dest;
2175 tree scalar_dest;
2176 tree op0, op1 = NULL_TREE;
2177 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2178 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2179 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2180 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2181 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2182 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2183 tree new_temp;
2184 tree def;
2185 gimple def_stmt;
2186 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2187 gimple new_stmt = NULL;
2188 stmt_vec_info prev_stmt_info;
2189 int nunits_in;
2190 int nunits_out;
2191 tree vectype_out, vectype_in;
2192 int ncopies, i, j;
2193 tree lhs_type, rhs_type;
2194 enum { NARROW, NONE, WIDEN } modifier;
2195 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2196 tree vop0;
2197 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2198 int multi_step_cvt = 0;
2199 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2200 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2201 int op_type;
2202 enum machine_mode rhs_mode;
2203 unsigned short fltsz;
2204
2205 /* Is STMT a vectorizable conversion? */
2206
2207 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2208 return false;
2209
2210 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2211 return false;
2212
2213 if (!is_gimple_assign (stmt))
2214 return false;
2215
2216 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2217 return false;
2218
2219 code = gimple_assign_rhs_code (stmt);
2220 if (!CONVERT_EXPR_CODE_P (code)
2221 && code != FIX_TRUNC_EXPR
2222 && code != FLOAT_EXPR
2223 && code != WIDEN_MULT_EXPR
2224 && code != WIDEN_LSHIFT_EXPR)
2225 return false;
2226
2227 op_type = TREE_CODE_LENGTH (code);
2228
2229 /* Check types of lhs and rhs. */
2230 scalar_dest = gimple_assign_lhs (stmt);
2231 lhs_type = TREE_TYPE (scalar_dest);
2232 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2233
2234 op0 = gimple_assign_rhs1 (stmt);
2235 rhs_type = TREE_TYPE (op0);
2236
2237 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2238 && !((INTEGRAL_TYPE_P (lhs_type)
2239 && INTEGRAL_TYPE_P (rhs_type))
2240 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2241 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2242 return false;
2243
2244 if ((INTEGRAL_TYPE_P (lhs_type)
2245 && (TYPE_PRECISION (lhs_type)
2246 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2247 || (INTEGRAL_TYPE_P (rhs_type)
2248 && (TYPE_PRECISION (rhs_type)
2249 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2250 {
2251 if (vect_print_dump_info (REPORT_DETAILS))
2252 fprintf (vect_dump,
2253 "type conversion to/from bit-precision unsupported.");
2254 return false;
2255 }
2256
2257 /* Check the operands of the operation. */
2258 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2259 &def_stmt, &def, &dt[0], &vectype_in))
2260 {
2261 if (vect_print_dump_info (REPORT_DETAILS))
2262 fprintf (vect_dump, "use not simple.");
2263 return false;
2264 }
2265 if (op_type == binary_op)
2266 {
2267 bool ok;
2268
2269 op1 = gimple_assign_rhs2 (stmt);
2270 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2271 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2272 OP1. */
2273 if (CONSTANT_CLASS_P (op0))
2274 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, NULL,
2275 &def_stmt, &def, &dt[1], &vectype_in);
2276 else
2277 ok = vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &def_stmt,
2278 &def, &dt[1]);
2279
2280 if (!ok)
2281 {
2282 if (vect_print_dump_info (REPORT_DETAILS))
2283 fprintf (vect_dump, "use not simple.");
2284 return false;
2285 }
2286 }
2287
2288 /* If op0 is an external or constant defs use a vector type of
2289 the same size as the output vector type. */
2290 if (!vectype_in)
2291 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2292 if (vec_stmt)
2293 gcc_assert (vectype_in);
2294 if (!vectype_in)
2295 {
2296 if (vect_print_dump_info (REPORT_DETAILS))
2297 {
2298 fprintf (vect_dump, "no vectype for scalar type ");
2299 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2300 }
2301
2302 return false;
2303 }
2304
2305 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2306 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2307 if (nunits_in < nunits_out)
2308 modifier = NARROW;
2309 else if (nunits_out == nunits_in)
2310 modifier = NONE;
2311 else
2312 modifier = WIDEN;
2313
2314 /* Multiple types in SLP are handled by creating the appropriate number of
2315 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2316 case of SLP. */
2317 if (slp_node || PURE_SLP_STMT (stmt_info))
2318 ncopies = 1;
2319 else if (modifier == NARROW)
2320 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2321 else
2322 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2323
2324 /* Sanity check: make sure that at least one copy of the vectorized stmt
2325 needs to be generated. */
2326 gcc_assert (ncopies >= 1);
2327
2328 /* Supportable by target? */
2329 switch (modifier)
2330 {
2331 case NONE:
2332 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2333 return false;
2334 if (supportable_convert_operation (code, vectype_out, vectype_in,
2335 &decl1, &code1))
2336 break;
2337 /* FALLTHRU */
2338 unsupported:
2339 if (vect_print_dump_info (REPORT_DETAILS))
2340 fprintf (vect_dump, "conversion not supported by target.");
2341 return false;
2342
2343 case WIDEN:
2344 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2345 &decl1, &decl2, &code1, &code2,
2346 &multi_step_cvt, &interm_types))
2347 {
2348 /* Binary widening operation can only be supported directly by the
2349 architecture. */
2350 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2351 break;
2352 }
2353
2354 if (code != FLOAT_EXPR
2355 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2356 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2357 goto unsupported;
2358
2359 rhs_mode = TYPE_MODE (rhs_type);
2360 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2361 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2362 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2363 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2364 {
2365 cvt_type
2366 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2367 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2368 if (cvt_type == NULL_TREE)
2369 goto unsupported;
2370
2371 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2372 {
2373 if (!supportable_convert_operation (code, vectype_out,
2374 cvt_type, &decl1, &codecvt1))
2375 goto unsupported;
2376 }
2377 else if (!supportable_widening_operation (code, stmt, vectype_out,
2378 cvt_type, &decl1, &decl2,
2379 &codecvt1, &codecvt2,
2380 &multi_step_cvt,
2381 &interm_types))
2382 continue;
2383 else
2384 gcc_assert (multi_step_cvt == 0);
2385
2386 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2387 vectype_in, NULL, NULL, &code1,
2388 &code2, &multi_step_cvt,
2389 &interm_types))
2390 break;
2391 }
2392
2393 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2394 goto unsupported;
2395
2396 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2397 codecvt2 = ERROR_MARK;
2398 else
2399 {
2400 multi_step_cvt++;
2401 VEC_safe_push (tree, heap, interm_types, cvt_type);
2402 cvt_type = NULL_TREE;
2403 }
2404 break;
2405
2406 case NARROW:
2407 gcc_assert (op_type == unary_op);
2408 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2409 &code1, &multi_step_cvt,
2410 &interm_types))
2411 break;
2412
2413 if (code != FIX_TRUNC_EXPR
2414 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2415 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2416 goto unsupported;
2417
2418 rhs_mode = TYPE_MODE (rhs_type);
2419 cvt_type
2420 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2421 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2422 if (cvt_type == NULL_TREE)
2423 goto unsupported;
2424 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2425 &decl1, &codecvt1))
2426 goto unsupported;
2427 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2428 &code1, &multi_step_cvt,
2429 &interm_types))
2430 break;
2431 goto unsupported;
2432
2433 default:
2434 gcc_unreachable ();
2435 }
2436
2437 if (!vec_stmt) /* transformation not required. */
2438 {
2439 if (vect_print_dump_info (REPORT_DETAILS))
2440 fprintf (vect_dump, "=== vectorizable_conversion ===");
2441 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2442 {
2443 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2444 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2445 }
2446 else if (modifier == NARROW)
2447 {
2448 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2449 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2450 }
2451 else
2452 {
2453 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2454 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2455 }
2456 VEC_free (tree, heap, interm_types);
2457 return true;
2458 }
2459
2460 /** Transform. **/
2461 if (vect_print_dump_info (REPORT_DETAILS))
2462 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2463
2464 if (op_type == binary_op)
2465 {
2466 if (CONSTANT_CLASS_P (op0))
2467 op0 = fold_convert (TREE_TYPE (op1), op0);
2468 else if (CONSTANT_CLASS_P (op1))
2469 op1 = fold_convert (TREE_TYPE (op0), op1);
2470 }
2471
2472 /* In case of multi-step conversion, we first generate conversion operations
2473 to the intermediate types, and then from that types to the final one.
2474 We create vector destinations for the intermediate type (TYPES) received
2475 from supportable_*_operation, and store them in the correct order
2476 for future use in vect_create_vectorized_*_stmts (). */
2477 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2478 vec_dest = vect_create_destination_var (scalar_dest,
2479 (cvt_type && modifier == WIDEN)
2480 ? cvt_type : vectype_out);
2481 VEC_quick_push (tree, vec_dsts, vec_dest);
2482
2483 if (multi_step_cvt)
2484 {
2485 for (i = VEC_length (tree, interm_types) - 1;
2486 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2487 {
2488 vec_dest = vect_create_destination_var (scalar_dest,
2489 intermediate_type);
2490 VEC_quick_push (tree, vec_dsts, vec_dest);
2491 }
2492 }
2493
2494 if (cvt_type)
2495 vec_dest = vect_create_destination_var (scalar_dest,
2496 modifier == WIDEN
2497 ? vectype_out : cvt_type);
2498
2499 if (!slp_node)
2500 {
2501 if (modifier == NONE)
2502 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2503 else if (modifier == WIDEN)
2504 {
2505 vec_oprnds0 = VEC_alloc (tree, heap,
2506 (multi_step_cvt
2507 ? vect_pow2 (multi_step_cvt) : 1));
2508 if (op_type == binary_op)
2509 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2510 }
2511 else
2512 vec_oprnds0 = VEC_alloc (tree, heap,
2513 2 * (multi_step_cvt
2514 ? vect_pow2 (multi_step_cvt) : 1));
2515 }
2516 else if (code == WIDEN_LSHIFT_EXPR)
2517 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2518
2519 last_oprnd = op0;
2520 prev_stmt_info = NULL;
2521 switch (modifier)
2522 {
2523 case NONE:
2524 for (j = 0; j < ncopies; j++)
2525 {
2526 if (j == 0)
2527 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2528 -1);
2529 else
2530 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2531
2532 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2533 {
2534 /* Arguments are ready, create the new vector stmt. */
2535 if (code1 == CALL_EXPR)
2536 {
2537 new_stmt = gimple_build_call (decl1, 1, vop0);
2538 new_temp = make_ssa_name (vec_dest, new_stmt);
2539 gimple_call_set_lhs (new_stmt, new_temp);
2540 }
2541 else
2542 {
2543 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2544 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2545 vop0, NULL);
2546 new_temp = make_ssa_name (vec_dest, new_stmt);
2547 gimple_assign_set_lhs (new_stmt, new_temp);
2548 }
2549
2550 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2551 if (slp_node)
2552 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2553 new_stmt);
2554 }
2555
2556 if (j == 0)
2557 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2558 else
2559 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2560 prev_stmt_info = vinfo_for_stmt (new_stmt);
2561 }
2562 break;
2563
2564 case WIDEN:
2565 /* In case the vectorization factor (VF) is bigger than the number
2566 of elements that we can fit in a vectype (nunits), we have to
2567 generate more than one vector stmt - i.e - we need to "unroll"
2568 the vector stmt by a factor VF/nunits. */
2569 for (j = 0; j < ncopies; j++)
2570 {
2571 /* Handle uses. */
2572 if (j == 0)
2573 {
2574 if (slp_node)
2575 {
2576 if (code == WIDEN_LSHIFT_EXPR)
2577 {
2578 unsigned int k;
2579
2580 vec_oprnd1 = op1;
2581 /* Store vec_oprnd1 for every vector stmt to be created
2582 for SLP_NODE. We check during the analysis that all
2583 the shift arguments are the same. */
2584 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2585 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2586
2587 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2588 slp_node, -1);
2589 }
2590 else
2591 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2592 &vec_oprnds1, slp_node, -1);
2593 }
2594 else
2595 {
2596 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2597 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2598 if (op_type == binary_op)
2599 {
2600 if (code == WIDEN_LSHIFT_EXPR)
2601 vec_oprnd1 = op1;
2602 else
2603 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2604 NULL);
2605 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2606 }
2607 }
2608 }
2609 else
2610 {
2611 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2612 VEC_truncate (tree, vec_oprnds0, 0);
2613 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2614 if (op_type == binary_op)
2615 {
2616 if (code == WIDEN_LSHIFT_EXPR)
2617 vec_oprnd1 = op1;
2618 else
2619 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2620 vec_oprnd1);
2621 VEC_truncate (tree, vec_oprnds1, 0);
2622 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2623 }
2624 }
2625
2626 /* Arguments are ready. Create the new vector stmts. */
2627 for (i = multi_step_cvt; i >= 0; i--)
2628 {
2629 tree this_dest = VEC_index (tree, vec_dsts, i);
2630 enum tree_code c1 = code1, c2 = code2;
2631 if (i == 0 && codecvt2 != ERROR_MARK)
2632 {
2633 c1 = codecvt1;
2634 c2 = codecvt2;
2635 }
2636 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2637 &vec_oprnds1,
2638 stmt, this_dest, gsi,
2639 c1, c2, decl1, decl2,
2640 op_type);
2641 }
2642
2643 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2644 {
2645 if (cvt_type)
2646 {
2647 if (codecvt1 == CALL_EXPR)
2648 {
2649 new_stmt = gimple_build_call (decl1, 1, vop0);
2650 new_temp = make_ssa_name (vec_dest, new_stmt);
2651 gimple_call_set_lhs (new_stmt, new_temp);
2652 }
2653 else
2654 {
2655 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2656 new_temp = make_ssa_name (vec_dest, NULL);
2657 new_stmt = gimple_build_assign_with_ops (codecvt1,
2658 new_temp,
2659 vop0, NULL);
2660 }
2661
2662 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2663 }
2664 else
2665 new_stmt = SSA_NAME_DEF_STMT (vop0);
2666
2667 if (slp_node)
2668 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2669 new_stmt);
2670 else
2671 {
2672 if (!prev_stmt_info)
2673 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2674 else
2675 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2676 prev_stmt_info = vinfo_for_stmt (new_stmt);
2677 }
2678 }
2679 }
2680
2681 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2682 break;
2683
2684 case NARROW:
2685 /* In case the vectorization factor (VF) is bigger than the number
2686 of elements that we can fit in a vectype (nunits), we have to
2687 generate more than one vector stmt - i.e - we need to "unroll"
2688 the vector stmt by a factor VF/nunits. */
2689 for (j = 0; j < ncopies; j++)
2690 {
2691 /* Handle uses. */
2692 if (slp_node)
2693 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2694 slp_node, -1);
2695 else
2696 {
2697 VEC_truncate (tree, vec_oprnds0, 0);
2698 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2699 vect_pow2 (multi_step_cvt) - 1);
2700 }
2701
2702 /* Arguments are ready. Create the new vector stmts. */
2703 if (cvt_type)
2704 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2705 {
2706 if (codecvt1 == CALL_EXPR)
2707 {
2708 new_stmt = gimple_build_call (decl1, 1, vop0);
2709 new_temp = make_ssa_name (vec_dest, new_stmt);
2710 gimple_call_set_lhs (new_stmt, new_temp);
2711 }
2712 else
2713 {
2714 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2715 new_temp = make_ssa_name (vec_dest, NULL);
2716 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2717 vop0, NULL);
2718 }
2719
2720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2721 VEC_replace (tree, vec_oprnds0, i, new_temp);
2722 }
2723
2724 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2725 stmt, vec_dsts, gsi,
2726 slp_node, code1,
2727 &prev_stmt_info);
2728 }
2729
2730 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2731 break;
2732 }
2733
2734 VEC_free (tree, heap, vec_oprnds0);
2735 VEC_free (tree, heap, vec_oprnds1);
2736 VEC_free (tree, heap, vec_dsts);
2737 VEC_free (tree, heap, interm_types);
2738
2739 return true;
2740 }
2741
2742
2743 /* Function vectorizable_assignment.
2744
2745 Check if STMT performs an assignment (copy) that can be vectorized.
2746 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2747 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2748 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2749
2750 static bool
2751 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2752 gimple *vec_stmt, slp_tree slp_node)
2753 {
2754 tree vec_dest;
2755 tree scalar_dest;
2756 tree op;
2757 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2758 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2759 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2760 tree new_temp;
2761 tree def;
2762 gimple def_stmt;
2763 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2764 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2765 int ncopies;
2766 int i, j;
2767 VEC(tree,heap) *vec_oprnds = NULL;
2768 tree vop;
2769 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2770 gimple new_stmt = NULL;
2771 stmt_vec_info prev_stmt_info = NULL;
2772 enum tree_code code;
2773 tree vectype_in;
2774
2775 /* Multiple types in SLP are handled by creating the appropriate number of
2776 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2777 case of SLP. */
2778 if (slp_node || PURE_SLP_STMT (stmt_info))
2779 ncopies = 1;
2780 else
2781 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2782
2783 gcc_assert (ncopies >= 1);
2784
2785 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2786 return false;
2787
2788 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2789 return false;
2790
2791 /* Is vectorizable assignment? */
2792 if (!is_gimple_assign (stmt))
2793 return false;
2794
2795 scalar_dest = gimple_assign_lhs (stmt);
2796 if (TREE_CODE (scalar_dest) != SSA_NAME)
2797 return false;
2798
2799 code = gimple_assign_rhs_code (stmt);
2800 if (gimple_assign_single_p (stmt)
2801 || code == PAREN_EXPR
2802 || CONVERT_EXPR_CODE_P (code))
2803 op = gimple_assign_rhs1 (stmt);
2804 else
2805 return false;
2806
2807 if (code == VIEW_CONVERT_EXPR)
2808 op = TREE_OPERAND (op, 0);
2809
2810 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2811 &def_stmt, &def, &dt[0], &vectype_in))
2812 {
2813 if (vect_print_dump_info (REPORT_DETAILS))
2814 fprintf (vect_dump, "use not simple.");
2815 return false;
2816 }
2817
2818 /* We can handle NOP_EXPR conversions that do not change the number
2819 of elements or the vector size. */
2820 if ((CONVERT_EXPR_CODE_P (code)
2821 || code == VIEW_CONVERT_EXPR)
2822 && (!vectype_in
2823 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2824 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2825 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2826 return false;
2827
2828 /* We do not handle bit-precision changes. */
2829 if ((CONVERT_EXPR_CODE_P (code)
2830 || code == VIEW_CONVERT_EXPR)
2831 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2832 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2833 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2834 || ((TYPE_PRECISION (TREE_TYPE (op))
2835 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2836 /* But a conversion that does not change the bit-pattern is ok. */
2837 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2838 > TYPE_PRECISION (TREE_TYPE (op)))
2839 && TYPE_UNSIGNED (TREE_TYPE (op))))
2840 {
2841 if (vect_print_dump_info (REPORT_DETAILS))
2842 fprintf (vect_dump, "type conversion to/from bit-precision "
2843 "unsupported.");
2844 return false;
2845 }
2846
2847 if (!vec_stmt) /* transformation not required. */
2848 {
2849 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2850 if (vect_print_dump_info (REPORT_DETAILS))
2851 fprintf (vect_dump, "=== vectorizable_assignment ===");
2852 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2853 return true;
2854 }
2855
2856 /** Transform. **/
2857 if (vect_print_dump_info (REPORT_DETAILS))
2858 fprintf (vect_dump, "transform assignment.");
2859
2860 /* Handle def. */
2861 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2862
2863 /* Handle use. */
2864 for (j = 0; j < ncopies; j++)
2865 {
2866 /* Handle uses. */
2867 if (j == 0)
2868 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2869 else
2870 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2871
2872 /* Arguments are ready. create the new vector stmt. */
2873 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2874 {
2875 if (CONVERT_EXPR_CODE_P (code)
2876 || code == VIEW_CONVERT_EXPR)
2877 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2878 new_stmt = gimple_build_assign (vec_dest, vop);
2879 new_temp = make_ssa_name (vec_dest, new_stmt);
2880 gimple_assign_set_lhs (new_stmt, new_temp);
2881 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2882 if (slp_node)
2883 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2884 }
2885
2886 if (slp_node)
2887 continue;
2888
2889 if (j == 0)
2890 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2891 else
2892 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2893
2894 prev_stmt_info = vinfo_for_stmt (new_stmt);
2895 }
2896
2897 VEC_free (tree, heap, vec_oprnds);
2898 return true;
2899 }
2900
2901
2902 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2903 either as shift by a scalar or by a vector. */
2904
2905 bool
2906 vect_supportable_shift (enum tree_code code, tree scalar_type)
2907 {
2908
2909 enum machine_mode vec_mode;
2910 optab optab;
2911 int icode;
2912 tree vectype;
2913
2914 vectype = get_vectype_for_scalar_type (scalar_type);
2915 if (!vectype)
2916 return false;
2917
2918 optab = optab_for_tree_code (code, vectype, optab_scalar);
2919 if (!optab
2920 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2921 {
2922 optab = optab_for_tree_code (code, vectype, optab_vector);
2923 if (!optab
2924 || (optab_handler (optab, TYPE_MODE (vectype))
2925 == CODE_FOR_nothing))
2926 return false;
2927 }
2928
2929 vec_mode = TYPE_MODE (vectype);
2930 icode = (int) optab_handler (optab, vec_mode);
2931 if (icode == CODE_FOR_nothing)
2932 return false;
2933
2934 return true;
2935 }
2936
2937
2938 /* Function vectorizable_shift.
2939
2940 Check if STMT performs a shift operation that can be vectorized.
2941 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2942 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2943 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2944
2945 static bool
2946 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2947 gimple *vec_stmt, slp_tree slp_node)
2948 {
2949 tree vec_dest;
2950 tree scalar_dest;
2951 tree op0, op1 = NULL;
2952 tree vec_oprnd1 = NULL_TREE;
2953 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2954 tree vectype;
2955 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2956 enum tree_code code;
2957 enum machine_mode vec_mode;
2958 tree new_temp;
2959 optab optab;
2960 int icode;
2961 enum machine_mode optab_op2_mode;
2962 tree def;
2963 gimple def_stmt;
2964 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2965 gimple new_stmt = NULL;
2966 stmt_vec_info prev_stmt_info;
2967 int nunits_in;
2968 int nunits_out;
2969 tree vectype_out;
2970 tree op1_vectype;
2971 int ncopies;
2972 int j, i;
2973 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2974 tree vop0, vop1;
2975 unsigned int k;
2976 bool scalar_shift_arg = true;
2977 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2978 int vf;
2979
2980 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2981 return false;
2982
2983 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2984 return false;
2985
2986 /* Is STMT a vectorizable binary/unary operation? */
2987 if (!is_gimple_assign (stmt))
2988 return false;
2989
2990 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2991 return false;
2992
2993 code = gimple_assign_rhs_code (stmt);
2994
2995 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2996 || code == RROTATE_EXPR))
2997 return false;
2998
2999 scalar_dest = gimple_assign_lhs (stmt);
3000 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3001 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3002 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3003 {
3004 if (vect_print_dump_info (REPORT_DETAILS))
3005 fprintf (vect_dump, "bit-precision shifts not supported.");
3006 return false;
3007 }
3008
3009 op0 = gimple_assign_rhs1 (stmt);
3010 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3011 &def_stmt, &def, &dt[0], &vectype))
3012 {
3013 if (vect_print_dump_info (REPORT_DETAILS))
3014 fprintf (vect_dump, "use not simple.");
3015 return false;
3016 }
3017 /* If op0 is an external or constant def use a vector type with
3018 the same size as the output vector type. */
3019 if (!vectype)
3020 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3021 if (vec_stmt)
3022 gcc_assert (vectype);
3023 if (!vectype)
3024 {
3025 if (vect_print_dump_info (REPORT_DETAILS))
3026 {
3027 fprintf (vect_dump, "no vectype for scalar type ");
3028 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3029 }
3030
3031 return false;
3032 }
3033
3034 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3035 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3036 if (nunits_out != nunits_in)
3037 return false;
3038
3039 op1 = gimple_assign_rhs2 (stmt);
3040 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3041 &def, &dt[1], &op1_vectype))
3042 {
3043 if (vect_print_dump_info (REPORT_DETAILS))
3044 fprintf (vect_dump, "use not simple.");
3045 return false;
3046 }
3047
3048 if (loop_vinfo)
3049 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3050 else
3051 vf = 1;
3052
3053 /* Multiple types in SLP are handled by creating the appropriate number of
3054 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3055 case of SLP. */
3056 if (slp_node || PURE_SLP_STMT (stmt_info))
3057 ncopies = 1;
3058 else
3059 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3060
3061 gcc_assert (ncopies >= 1);
3062
3063 /* Determine whether the shift amount is a vector, or scalar. If the
3064 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3065
3066 if (dt[1] == vect_internal_def && !slp_node)
3067 scalar_shift_arg = false;
3068 else if (dt[1] == vect_constant_def
3069 || dt[1] == vect_external_def
3070 || dt[1] == vect_internal_def)
3071 {
3072 /* In SLP, need to check whether the shift count is the same,
3073 in loops if it is a constant or invariant, it is always
3074 a scalar shift. */
3075 if (slp_node)
3076 {
3077 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3078 gimple slpstmt;
3079
3080 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3081 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3082 scalar_shift_arg = false;
3083 }
3084 }
3085 else
3086 {
3087 if (vect_print_dump_info (REPORT_DETAILS))
3088 fprintf (vect_dump, "operand mode requires invariant argument.");
3089 return false;
3090 }
3091
3092 /* Vector shifted by vector. */
3093 if (!scalar_shift_arg)
3094 {
3095 optab = optab_for_tree_code (code, vectype, optab_vector);
3096 if (vect_print_dump_info (REPORT_DETAILS))
3097 fprintf (vect_dump, "vector/vector shift/rotate found.");
3098 if (!op1_vectype)
3099 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3100 if (op1_vectype == NULL_TREE
3101 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3102 {
3103 if (vect_print_dump_info (REPORT_DETAILS))
3104 fprintf (vect_dump, "unusable type for last operand in"
3105 " vector/vector shift/rotate.");
3106 return false;
3107 }
3108 }
3109 /* See if the machine has a vector shifted by scalar insn and if not
3110 then see if it has a vector shifted by vector insn. */
3111 else
3112 {
3113 optab = optab_for_tree_code (code, vectype, optab_scalar);
3114 if (optab
3115 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3116 {
3117 if (vect_print_dump_info (REPORT_DETAILS))
3118 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3119 }
3120 else
3121 {
3122 optab = optab_for_tree_code (code, vectype, optab_vector);
3123 if (optab
3124 && (optab_handler (optab, TYPE_MODE (vectype))
3125 != CODE_FOR_nothing))
3126 {
3127 scalar_shift_arg = false;
3128
3129 if (vect_print_dump_info (REPORT_DETAILS))
3130 fprintf (vect_dump, "vector/vector shift/rotate found.");
3131
3132 /* Unlike the other binary operators, shifts/rotates have
3133 the rhs being int, instead of the same type as the lhs,
3134 so make sure the scalar is the right type if we are
3135 dealing with vectors of long long/long/short/char. */
3136 if (dt[1] == vect_constant_def)
3137 op1 = fold_convert (TREE_TYPE (vectype), op1);
3138 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3139 TREE_TYPE (op1)))
3140 {
3141 if (slp_node
3142 && TYPE_MODE (TREE_TYPE (vectype))
3143 != TYPE_MODE (TREE_TYPE (op1)))
3144 {
3145 if (vect_print_dump_info (REPORT_DETAILS))
3146 fprintf (vect_dump, "unusable type for last operand in"
3147 " vector/vector shift/rotate.");
3148 return false;
3149 }
3150 if (vec_stmt && !slp_node)
3151 {
3152 op1 = fold_convert (TREE_TYPE (vectype), op1);
3153 op1 = vect_init_vector (stmt, op1,
3154 TREE_TYPE (vectype), NULL);
3155 }
3156 }
3157 }
3158 }
3159 }
3160
3161 /* Supportable by target? */
3162 if (!optab)
3163 {
3164 if (vect_print_dump_info (REPORT_DETAILS))
3165 fprintf (vect_dump, "no optab.");
3166 return false;
3167 }
3168 vec_mode = TYPE_MODE (vectype);
3169 icode = (int) optab_handler (optab, vec_mode);
3170 if (icode == CODE_FOR_nothing)
3171 {
3172 if (vect_print_dump_info (REPORT_DETAILS))
3173 fprintf (vect_dump, "op not supported by target.");
3174 /* Check only during analysis. */
3175 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3176 || (vf < vect_min_worthwhile_factor (code)
3177 && !vec_stmt))
3178 return false;
3179 if (vect_print_dump_info (REPORT_DETAILS))
3180 fprintf (vect_dump, "proceeding using word mode.");
3181 }
3182
3183 /* Worthwhile without SIMD support? Check only during analysis. */
3184 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3185 && vf < vect_min_worthwhile_factor (code)
3186 && !vec_stmt)
3187 {
3188 if (vect_print_dump_info (REPORT_DETAILS))
3189 fprintf (vect_dump, "not worthwhile without SIMD support.");
3190 return false;
3191 }
3192
3193 if (!vec_stmt) /* transformation not required. */
3194 {
3195 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3196 if (vect_print_dump_info (REPORT_DETAILS))
3197 fprintf (vect_dump, "=== vectorizable_shift ===");
3198 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3199 return true;
3200 }
3201
3202 /** Transform. **/
3203
3204 if (vect_print_dump_info (REPORT_DETAILS))
3205 fprintf (vect_dump, "transform binary/unary operation.");
3206
3207 /* Handle def. */
3208 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3209
3210 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3211 created in the previous stages of the recursion, so no allocation is
3212 needed, except for the case of shift with scalar shift argument. In that
3213 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3214 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3215 In case of loop-based vectorization we allocate VECs of size 1. We
3216 allocate VEC_OPRNDS1 only in case of binary operation. */
3217 if (!slp_node)
3218 {
3219 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3220 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3221 }
3222 else if (scalar_shift_arg)
3223 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3224
3225 prev_stmt_info = NULL;
3226 for (j = 0; j < ncopies; j++)
3227 {
3228 /* Handle uses. */
3229 if (j == 0)
3230 {
3231 if (scalar_shift_arg)
3232 {
3233 /* Vector shl and shr insn patterns can be defined with scalar
3234 operand 2 (shift operand). In this case, use constant or loop
3235 invariant op1 directly, without extending it to vector mode
3236 first. */
3237 optab_op2_mode = insn_data[icode].operand[2].mode;
3238 if (!VECTOR_MODE_P (optab_op2_mode))
3239 {
3240 if (vect_print_dump_info (REPORT_DETAILS))
3241 fprintf (vect_dump, "operand 1 using scalar mode.");
3242 vec_oprnd1 = op1;
3243 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3244 if (slp_node)
3245 {
3246 /* Store vec_oprnd1 for every vector stmt to be created
3247 for SLP_NODE. We check during the analysis that all
3248 the shift arguments are the same.
3249 TODO: Allow different constants for different vector
3250 stmts generated for an SLP instance. */
3251 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3252 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3253 }
3254 }
3255 }
3256
3257 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3258 (a special case for certain kind of vector shifts); otherwise,
3259 operand 1 should be of a vector type (the usual case). */
3260 if (vec_oprnd1)
3261 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3262 slp_node, -1);
3263 else
3264 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3265 slp_node, -1);
3266 }
3267 else
3268 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3269
3270 /* Arguments are ready. Create the new vector stmt. */
3271 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3272 {
3273 vop1 = VEC_index (tree, vec_oprnds1, i);
3274 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3275 new_temp = make_ssa_name (vec_dest, new_stmt);
3276 gimple_assign_set_lhs (new_stmt, new_temp);
3277 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3278 if (slp_node)
3279 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3280 }
3281
3282 if (slp_node)
3283 continue;
3284
3285 if (j == 0)
3286 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3287 else
3288 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3289 prev_stmt_info = vinfo_for_stmt (new_stmt);
3290 }
3291
3292 VEC_free (tree, heap, vec_oprnds0);
3293 VEC_free (tree, heap, vec_oprnds1);
3294
3295 return true;
3296 }
3297
3298
3299 /* Function vectorizable_operation.
3300
3301 Check if STMT performs a binary, unary or ternary operation that can
3302 be vectorized.
3303 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3304 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3305 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3306
3307 static bool
3308 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3309 gimple *vec_stmt, slp_tree slp_node)
3310 {
3311 tree vec_dest;
3312 tree scalar_dest;
3313 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3314 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3315 tree vectype;
3316 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3317 enum tree_code code;
3318 enum machine_mode vec_mode;
3319 tree new_temp;
3320 int op_type;
3321 optab optab;
3322 int icode;
3323 tree def;
3324 gimple def_stmt;
3325 enum vect_def_type dt[3]
3326 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3327 gimple new_stmt = NULL;
3328 stmt_vec_info prev_stmt_info;
3329 int nunits_in;
3330 int nunits_out;
3331 tree vectype_out;
3332 int ncopies;
3333 int j, i;
3334 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3335 tree vop0, vop1, vop2;
3336 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3337 int vf;
3338
3339 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3340 return false;
3341
3342 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3343 return false;
3344
3345 /* Is STMT a vectorizable binary/unary operation? */
3346 if (!is_gimple_assign (stmt))
3347 return false;
3348
3349 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3350 return false;
3351
3352 code = gimple_assign_rhs_code (stmt);
3353
3354 /* For pointer addition, we should use the normal plus for
3355 the vector addition. */
3356 if (code == POINTER_PLUS_EXPR)
3357 code = PLUS_EXPR;
3358
3359 /* Support only unary or binary operations. */
3360 op_type = TREE_CODE_LENGTH (code);
3361 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3362 {
3363 if (vect_print_dump_info (REPORT_DETAILS))
3364 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3365 op_type);
3366 return false;
3367 }
3368
3369 scalar_dest = gimple_assign_lhs (stmt);
3370 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3371
3372 /* Most operations cannot handle bit-precision types without extra
3373 truncations. */
3374 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3375 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3376 /* Exception are bitwise binary operations. */
3377 && code != BIT_IOR_EXPR
3378 && code != BIT_XOR_EXPR
3379 && code != BIT_AND_EXPR)
3380 {
3381 if (vect_print_dump_info (REPORT_DETAILS))
3382 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3383 return false;
3384 }
3385
3386 op0 = gimple_assign_rhs1 (stmt);
3387 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3388 &def_stmt, &def, &dt[0], &vectype))
3389 {
3390 if (vect_print_dump_info (REPORT_DETAILS))
3391 fprintf (vect_dump, "use not simple.");
3392 return false;
3393 }
3394 /* If op0 is an external or constant def use a vector type with
3395 the same size as the output vector type. */
3396 if (!vectype)
3397 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3398 if (vec_stmt)
3399 gcc_assert (vectype);
3400 if (!vectype)
3401 {
3402 if (vect_print_dump_info (REPORT_DETAILS))
3403 {
3404 fprintf (vect_dump, "no vectype for scalar type ");
3405 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3406 }
3407
3408 return false;
3409 }
3410
3411 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3412 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3413 if (nunits_out != nunits_in)
3414 return false;
3415
3416 if (op_type == binary_op || op_type == ternary_op)
3417 {
3418 op1 = gimple_assign_rhs2 (stmt);
3419 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3420 &def, &dt[1]))
3421 {
3422 if (vect_print_dump_info (REPORT_DETAILS))
3423 fprintf (vect_dump, "use not simple.");
3424 return false;
3425 }
3426 }
3427 if (op_type == ternary_op)
3428 {
3429 op2 = gimple_assign_rhs3 (stmt);
3430 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3431 &def, &dt[2]))
3432 {
3433 if (vect_print_dump_info (REPORT_DETAILS))
3434 fprintf (vect_dump, "use not simple.");
3435 return false;
3436 }
3437 }
3438
3439 if (loop_vinfo)
3440 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3441 else
3442 vf = 1;
3443
3444 /* Multiple types in SLP are handled by creating the appropriate number of
3445 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3446 case of SLP. */
3447 if (slp_node || PURE_SLP_STMT (stmt_info))
3448 ncopies = 1;
3449 else
3450 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3451
3452 gcc_assert (ncopies >= 1);
3453
3454 /* Shifts are handled in vectorizable_shift (). */
3455 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3456 || code == RROTATE_EXPR)
3457 return false;
3458
3459 optab = optab_for_tree_code (code, vectype, optab_default);
3460
3461 /* Supportable by target? */
3462 if (!optab)
3463 {
3464 if (vect_print_dump_info (REPORT_DETAILS))
3465 fprintf (vect_dump, "no optab.");
3466 return false;
3467 }
3468 vec_mode = TYPE_MODE (vectype);
3469 icode = (int) optab_handler (optab, vec_mode);
3470 if (icode == CODE_FOR_nothing)
3471 {
3472 if (vect_print_dump_info (REPORT_DETAILS))
3473 fprintf (vect_dump, "op not supported by target.");
3474 /* Check only during analysis. */
3475 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3476 || (vf < vect_min_worthwhile_factor (code)
3477 && !vec_stmt))
3478 return false;
3479 if (vect_print_dump_info (REPORT_DETAILS))
3480 fprintf (vect_dump, "proceeding using word mode.");
3481 }
3482
3483 /* Worthwhile without SIMD support? Check only during analysis. */
3484 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3485 && vf < vect_min_worthwhile_factor (code)
3486 && !vec_stmt)
3487 {
3488 if (vect_print_dump_info (REPORT_DETAILS))
3489 fprintf (vect_dump, "not worthwhile without SIMD support.");
3490 return false;
3491 }
3492
3493 if (!vec_stmt) /* transformation not required. */
3494 {
3495 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3496 if (vect_print_dump_info (REPORT_DETAILS))
3497 fprintf (vect_dump, "=== vectorizable_operation ===");
3498 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3499 return true;
3500 }
3501
3502 /** Transform. **/
3503
3504 if (vect_print_dump_info (REPORT_DETAILS))
3505 fprintf (vect_dump, "transform binary/unary operation.");
3506
3507 /* Handle def. */
3508 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3509
3510 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3511 created in the previous stages of the recursion, so no allocation is
3512 needed, except for the case of shift with scalar shift argument. In that
3513 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3514 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3515 In case of loop-based vectorization we allocate VECs of size 1. We
3516 allocate VEC_OPRNDS1 only in case of binary operation. */
3517 if (!slp_node)
3518 {
3519 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3520 if (op_type == binary_op || op_type == ternary_op)
3521 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3522 if (op_type == ternary_op)
3523 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3524 }
3525
3526 /* In case the vectorization factor (VF) is bigger than the number
3527 of elements that we can fit in a vectype (nunits), we have to generate
3528 more than one vector stmt - i.e - we need to "unroll" the
3529 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3530 from one copy of the vector stmt to the next, in the field
3531 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3532 stages to find the correct vector defs to be used when vectorizing
3533 stmts that use the defs of the current stmt. The example below
3534 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3535 we need to create 4 vectorized stmts):
3536
3537 before vectorization:
3538 RELATED_STMT VEC_STMT
3539 S1: x = memref - -
3540 S2: z = x + 1 - -
3541
3542 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3543 there):
3544 RELATED_STMT VEC_STMT
3545 VS1_0: vx0 = memref0 VS1_1 -
3546 VS1_1: vx1 = memref1 VS1_2 -
3547 VS1_2: vx2 = memref2 VS1_3 -
3548 VS1_3: vx3 = memref3 - -
3549 S1: x = load - VS1_0
3550 S2: z = x + 1 - -
3551
3552 step2: vectorize stmt S2 (done here):
3553 To vectorize stmt S2 we first need to find the relevant vector
3554 def for the first operand 'x'. This is, as usual, obtained from
3555 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3556 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3557 relevant vector def 'vx0'. Having found 'vx0' we can generate
3558 the vector stmt VS2_0, and as usual, record it in the
3559 STMT_VINFO_VEC_STMT of stmt S2.
3560 When creating the second copy (VS2_1), we obtain the relevant vector
3561 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3562 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3563 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3564 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3565 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3566 chain of stmts and pointers:
3567 RELATED_STMT VEC_STMT
3568 VS1_0: vx0 = memref0 VS1_1 -
3569 VS1_1: vx1 = memref1 VS1_2 -
3570 VS1_2: vx2 = memref2 VS1_3 -
3571 VS1_3: vx3 = memref3 - -
3572 S1: x = load - VS1_0
3573 VS2_0: vz0 = vx0 + v1 VS2_1 -
3574 VS2_1: vz1 = vx1 + v1 VS2_2 -
3575 VS2_2: vz2 = vx2 + v1 VS2_3 -
3576 VS2_3: vz3 = vx3 + v1 - -
3577 S2: z = x + 1 - VS2_0 */
3578
3579 prev_stmt_info = NULL;
3580 for (j = 0; j < ncopies; j++)
3581 {
3582 /* Handle uses. */
3583 if (j == 0)
3584 {
3585 if (op_type == binary_op || op_type == ternary_op)
3586 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3587 slp_node, -1);
3588 else
3589 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3590 slp_node, -1);
3591 if (op_type == ternary_op)
3592 {
3593 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3594 VEC_quick_push (tree, vec_oprnds2,
3595 vect_get_vec_def_for_operand (op2, stmt, NULL));
3596 }
3597 }
3598 else
3599 {
3600 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3601 if (op_type == ternary_op)
3602 {
3603 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3604 VEC_quick_push (tree, vec_oprnds2,
3605 vect_get_vec_def_for_stmt_copy (dt[2],
3606 vec_oprnd));
3607 }
3608 }
3609
3610 /* Arguments are ready. Create the new vector stmt. */
3611 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3612 {
3613 vop1 = ((op_type == binary_op || op_type == ternary_op)
3614 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3615 vop2 = ((op_type == ternary_op)
3616 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3617 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3618 vop0, vop1, vop2);
3619 new_temp = make_ssa_name (vec_dest, new_stmt);
3620 gimple_assign_set_lhs (new_stmt, new_temp);
3621 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3622 if (slp_node)
3623 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3624 }
3625
3626 if (slp_node)
3627 continue;
3628
3629 if (j == 0)
3630 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3631 else
3632 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3633 prev_stmt_info = vinfo_for_stmt (new_stmt);
3634 }
3635
3636 VEC_free (tree, heap, vec_oprnds0);
3637 if (vec_oprnds1)
3638 VEC_free (tree, heap, vec_oprnds1);
3639 if (vec_oprnds2)
3640 VEC_free (tree, heap, vec_oprnds2);
3641
3642 return true;
3643 }
3644
3645
3646 /* Function vectorizable_store.
3647
3648 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3649 can be vectorized.
3650 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3651 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3652 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3653
3654 static bool
3655 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3656 slp_tree slp_node)
3657 {
3658 tree scalar_dest;
3659 tree data_ref;
3660 tree op;
3661 tree vec_oprnd = NULL_TREE;
3662 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3663 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3664 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3665 tree elem_type;
3666 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3667 struct loop *loop = NULL;
3668 enum machine_mode vec_mode;
3669 tree dummy;
3670 enum dr_alignment_support alignment_support_scheme;
3671 tree def;
3672 gimple def_stmt;
3673 enum vect_def_type dt;
3674 stmt_vec_info prev_stmt_info = NULL;
3675 tree dataref_ptr = NULL_TREE;
3676 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3677 int ncopies;
3678 int j;
3679 gimple next_stmt, first_stmt = NULL;
3680 bool strided_store = false;
3681 bool store_lanes_p = false;
3682 unsigned int group_size, i;
3683 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3684 bool inv_p;
3685 VEC(tree,heap) *vec_oprnds = NULL;
3686 bool slp = (slp_node != NULL);
3687 unsigned int vec_num;
3688 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3689 tree aggr_type;
3690
3691 if (loop_vinfo)
3692 loop = LOOP_VINFO_LOOP (loop_vinfo);
3693
3694 /* Multiple types in SLP are handled by creating the appropriate number of
3695 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3696 case of SLP. */
3697 if (slp || PURE_SLP_STMT (stmt_info))
3698 ncopies = 1;
3699 else
3700 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3701
3702 gcc_assert (ncopies >= 1);
3703
3704 /* FORNOW. This restriction should be relaxed. */
3705 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3706 {
3707 if (vect_print_dump_info (REPORT_DETAILS))
3708 fprintf (vect_dump, "multiple types in nested loop.");
3709 return false;
3710 }
3711
3712 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3713 return false;
3714
3715 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3716 return false;
3717
3718 /* Is vectorizable store? */
3719
3720 if (!is_gimple_assign (stmt))
3721 return false;
3722
3723 scalar_dest = gimple_assign_lhs (stmt);
3724 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3725 && is_pattern_stmt_p (stmt_info))
3726 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3727 if (TREE_CODE (scalar_dest) != ARRAY_REF
3728 && TREE_CODE (scalar_dest) != INDIRECT_REF
3729 && TREE_CODE (scalar_dest) != COMPONENT_REF
3730 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3731 && TREE_CODE (scalar_dest) != REALPART_EXPR
3732 && TREE_CODE (scalar_dest) != MEM_REF)
3733 return false;
3734
3735 gcc_assert (gimple_assign_single_p (stmt));
3736 op = gimple_assign_rhs1 (stmt);
3737 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3738 &def, &dt))
3739 {
3740 if (vect_print_dump_info (REPORT_DETAILS))
3741 fprintf (vect_dump, "use not simple.");
3742 return false;
3743 }
3744
3745 elem_type = TREE_TYPE (vectype);
3746 vec_mode = TYPE_MODE (vectype);
3747
3748 /* FORNOW. In some cases can vectorize even if data-type not supported
3749 (e.g. - array initialization with 0). */
3750 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3751 return false;
3752
3753 if (!STMT_VINFO_DATA_REF (stmt_info))
3754 return false;
3755
3756 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3757 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3758 size_zero_node) < 0)
3759 {
3760 if (vect_print_dump_info (REPORT_DETAILS))
3761 fprintf (vect_dump, "negative step for store.");
3762 return false;
3763 }
3764
3765 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3766 {
3767 strided_store = true;
3768 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3769 if (!slp && !PURE_SLP_STMT (stmt_info))
3770 {
3771 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3772 if (vect_store_lanes_supported (vectype, group_size))
3773 store_lanes_p = true;
3774 else if (!vect_strided_store_supported (vectype, group_size))
3775 return false;
3776 }
3777
3778 if (first_stmt == stmt)
3779 {
3780 /* STMT is the leader of the group. Check the operands of all the
3781 stmts of the group. */
3782 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3783 while (next_stmt)
3784 {
3785 gcc_assert (gimple_assign_single_p (next_stmt));
3786 op = gimple_assign_rhs1 (next_stmt);
3787 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3788 &def_stmt, &def, &dt))
3789 {
3790 if (vect_print_dump_info (REPORT_DETAILS))
3791 fprintf (vect_dump, "use not simple.");
3792 return false;
3793 }
3794 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3795 }
3796 }
3797 }
3798
3799 if (!vec_stmt) /* transformation not required. */
3800 {
3801 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3802 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3803 return true;
3804 }
3805
3806 /** Transform. **/
3807
3808 if (strided_store)
3809 {
3810 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3811 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3812
3813 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3814
3815 /* FORNOW */
3816 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3817
3818 /* We vectorize all the stmts of the interleaving group when we
3819 reach the last stmt in the group. */
3820 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3821 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3822 && !slp)
3823 {
3824 *vec_stmt = NULL;
3825 return true;
3826 }
3827
3828 if (slp)
3829 {
3830 strided_store = false;
3831 /* VEC_NUM is the number of vect stmts to be created for this
3832 group. */
3833 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3834 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3835 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3836 op = gimple_assign_rhs1 (first_stmt);
3837 }
3838 else
3839 /* VEC_NUM is the number of vect stmts to be created for this
3840 group. */
3841 vec_num = group_size;
3842 }
3843 else
3844 {
3845 first_stmt = stmt;
3846 first_dr = dr;
3847 group_size = vec_num = 1;
3848 }
3849
3850 if (vect_print_dump_info (REPORT_DETAILS))
3851 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3852
3853 dr_chain = VEC_alloc (tree, heap, group_size);
3854 oprnds = VEC_alloc (tree, heap, group_size);
3855
3856 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3857 gcc_assert (alignment_support_scheme);
3858 /* Targets with store-lane instructions must not require explicit
3859 realignment. */
3860 gcc_assert (!store_lanes_p
3861 || alignment_support_scheme == dr_aligned
3862 || alignment_support_scheme == dr_unaligned_supported);
3863
3864 if (store_lanes_p)
3865 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3866 else
3867 aggr_type = vectype;
3868
3869 /* In case the vectorization factor (VF) is bigger than the number
3870 of elements that we can fit in a vectype (nunits), we have to generate
3871 more than one vector stmt - i.e - we need to "unroll" the
3872 vector stmt by a factor VF/nunits. For more details see documentation in
3873 vect_get_vec_def_for_copy_stmt. */
3874
3875 /* In case of interleaving (non-unit strided access):
3876
3877 S1: &base + 2 = x2
3878 S2: &base = x0
3879 S3: &base + 1 = x1
3880 S4: &base + 3 = x3
3881
3882 We create vectorized stores starting from base address (the access of the
3883 first stmt in the chain (S2 in the above example), when the last store stmt
3884 of the chain (S4) is reached:
3885
3886 VS1: &base = vx2
3887 VS2: &base + vec_size*1 = vx0
3888 VS3: &base + vec_size*2 = vx1
3889 VS4: &base + vec_size*3 = vx3
3890
3891 Then permutation statements are generated:
3892
3893 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3894 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3895 ...
3896
3897 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3898 (the order of the data-refs in the output of vect_permute_store_chain
3899 corresponds to the order of scalar stmts in the interleaving chain - see
3900 the documentation of vect_permute_store_chain()).
3901
3902 In case of both multiple types and interleaving, above vector stores and
3903 permutation stmts are created for every copy. The result vector stmts are
3904 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3905 STMT_VINFO_RELATED_STMT for the next copies.
3906 */
3907
3908 prev_stmt_info = NULL;
3909 for (j = 0; j < ncopies; j++)
3910 {
3911 gimple new_stmt;
3912 gimple ptr_incr;
3913
3914 if (j == 0)
3915 {
3916 if (slp)
3917 {
3918 /* Get vectorized arguments for SLP_NODE. */
3919 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3920 NULL, slp_node, -1);
3921
3922 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3923 }
3924 else
3925 {
3926 /* For interleaved stores we collect vectorized defs for all the
3927 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3928 used as an input to vect_permute_store_chain(), and OPRNDS as
3929 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3930
3931 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3932 OPRNDS are of size 1. */
3933 next_stmt = first_stmt;
3934 for (i = 0; i < group_size; i++)
3935 {
3936 /* Since gaps are not supported for interleaved stores,
3937 GROUP_SIZE is the exact number of stmts in the chain.
3938 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3939 there is no interleaving, GROUP_SIZE is 1, and only one
3940 iteration of the loop will be executed. */
3941 gcc_assert (next_stmt
3942 && gimple_assign_single_p (next_stmt));
3943 op = gimple_assign_rhs1 (next_stmt);
3944
3945 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3946 NULL);
3947 VEC_quick_push(tree, dr_chain, vec_oprnd);
3948 VEC_quick_push(tree, oprnds, vec_oprnd);
3949 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3950 }
3951 }
3952
3953 /* We should have catched mismatched types earlier. */
3954 gcc_assert (useless_type_conversion_p (vectype,
3955 TREE_TYPE (vec_oprnd)));
3956 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3957 NULL_TREE, &dummy, gsi,
3958 &ptr_incr, false, &inv_p);
3959 gcc_assert (bb_vinfo || !inv_p);
3960 }
3961 else
3962 {
3963 /* For interleaved stores we created vectorized defs for all the
3964 defs stored in OPRNDS in the previous iteration (previous copy).
3965 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3966 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3967 next copy.
3968 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3969 OPRNDS are of size 1. */
3970 for (i = 0; i < group_size; i++)
3971 {
3972 op = VEC_index (tree, oprnds, i);
3973 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
3974 &def, &dt);
3975 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3976 VEC_replace(tree, dr_chain, i, vec_oprnd);
3977 VEC_replace(tree, oprnds, i, vec_oprnd);
3978 }
3979 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3980 TYPE_SIZE_UNIT (aggr_type));
3981 }
3982
3983 if (store_lanes_p)
3984 {
3985 tree vec_array;
3986
3987 /* Combine all the vectors into an array. */
3988 vec_array = create_vector_array (vectype, vec_num);
3989 for (i = 0; i < vec_num; i++)
3990 {
3991 vec_oprnd = VEC_index (tree, dr_chain, i);
3992 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3993 }
3994
3995 /* Emit:
3996 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3997 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3998 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3999 gimple_call_set_lhs (new_stmt, data_ref);
4000 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4001 mark_symbols_for_renaming (new_stmt);
4002 }
4003 else
4004 {
4005 new_stmt = NULL;
4006 if (strided_store)
4007 {
4008 result_chain = VEC_alloc (tree, heap, group_size);
4009 /* Permute. */
4010 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4011 &result_chain);
4012 }
4013
4014 next_stmt = first_stmt;
4015 for (i = 0; i < vec_num; i++)
4016 {
4017 struct ptr_info_def *pi;
4018
4019 if (i > 0)
4020 /* Bump the vector pointer. */
4021 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4022 stmt, NULL_TREE);
4023
4024 if (slp)
4025 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4026 else if (strided_store)
4027 /* For strided stores vectorized defs are interleaved in
4028 vect_permute_store_chain(). */
4029 vec_oprnd = VEC_index (tree, result_chain, i);
4030
4031 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4032 build_int_cst (reference_alias_ptr_type
4033 (DR_REF (first_dr)), 0));
4034 pi = get_ptr_info (dataref_ptr);
4035 pi->align = TYPE_ALIGN_UNIT (vectype);
4036 if (aligned_access_p (first_dr))
4037 pi->misalign = 0;
4038 else if (DR_MISALIGNMENT (first_dr) == -1)
4039 {
4040 TREE_TYPE (data_ref)
4041 = build_aligned_type (TREE_TYPE (data_ref),
4042 TYPE_ALIGN (elem_type));
4043 pi->align = TYPE_ALIGN_UNIT (elem_type);
4044 pi->misalign = 0;
4045 }
4046 else
4047 {
4048 TREE_TYPE (data_ref)
4049 = build_aligned_type (TREE_TYPE (data_ref),
4050 TYPE_ALIGN (elem_type));
4051 pi->misalign = DR_MISALIGNMENT (first_dr);
4052 }
4053
4054 /* Arguments are ready. Create the new vector stmt. */
4055 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4056 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4057 mark_symbols_for_renaming (new_stmt);
4058
4059 if (slp)
4060 continue;
4061
4062 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4063 if (!next_stmt)
4064 break;
4065 }
4066 }
4067 if (!slp)
4068 {
4069 if (j == 0)
4070 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4071 else
4072 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4073 prev_stmt_info = vinfo_for_stmt (new_stmt);
4074 }
4075 }
4076
4077 VEC_free (tree, heap, dr_chain);
4078 VEC_free (tree, heap, oprnds);
4079 if (result_chain)
4080 VEC_free (tree, heap, result_chain);
4081 if (vec_oprnds)
4082 VEC_free (tree, heap, vec_oprnds);
4083
4084 return true;
4085 }
4086
4087 /* Given a vector type VECTYPE and permutation SEL returns
4088 the VECTOR_CST mask that implements the permutation of the
4089 vector elements. If that is impossible to do, returns NULL. */
4090
4091 tree
4092 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4093 {
4094 tree mask_elt_type, mask_type, mask_vec;
4095 int i, nunits;
4096
4097 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4098
4099 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4100 return NULL;
4101
4102 mask_elt_type
4103 = lang_hooks.types.type_for_size
4104 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4105 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4106
4107 mask_vec = NULL;
4108 for (i = nunits - 1; i >= 0; i--)
4109 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4110 mask_vec);
4111 mask_vec = build_vector (mask_type, mask_vec);
4112
4113 return mask_vec;
4114 }
4115
4116 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4117 reversal of the vector elements. If that is impossible to do,
4118 returns NULL. */
4119
4120 static tree
4121 perm_mask_for_reverse (tree vectype)
4122 {
4123 int i, nunits;
4124 unsigned char *sel;
4125
4126 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4127 sel = XALLOCAVEC (unsigned char, nunits);
4128
4129 for (i = 0; i < nunits; ++i)
4130 sel[i] = nunits - 1 - i;
4131
4132 return vect_gen_perm_mask (vectype, sel);
4133 }
4134
4135 /* Given a vector variable X and Y, that was generated for the scalar
4136 STMT, generate instructions to permute the vector elements of X and Y
4137 using permutation mask MASK_VEC, insert them at *GSI and return the
4138 permuted vector variable. */
4139
4140 static tree
4141 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4142 gimple_stmt_iterator *gsi)
4143 {
4144 tree vectype = TREE_TYPE (x);
4145 tree perm_dest, data_ref;
4146 gimple perm_stmt;
4147
4148 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4149 data_ref = make_ssa_name (perm_dest, NULL);
4150
4151 /* Generate the permute statement. */
4152 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4153 x, y, mask_vec);
4154 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4155
4156 return data_ref;
4157 }
4158
4159 /* vectorizable_load.
4160
4161 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4162 can be vectorized.
4163 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4164 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4165 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4166
4167 static bool
4168 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4169 slp_tree slp_node, slp_instance slp_node_instance)
4170 {
4171 tree scalar_dest;
4172 tree vec_dest = NULL;
4173 tree data_ref = NULL;
4174 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4175 stmt_vec_info prev_stmt_info;
4176 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4177 struct loop *loop = NULL;
4178 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4179 bool nested_in_vect_loop = false;
4180 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4181 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4182 tree elem_type;
4183 tree new_temp;
4184 enum machine_mode mode;
4185 gimple new_stmt = NULL;
4186 tree dummy;
4187 enum dr_alignment_support alignment_support_scheme;
4188 tree dataref_ptr = NULL_TREE;
4189 gimple ptr_incr;
4190 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4191 int ncopies;
4192 int i, j, group_size;
4193 tree msq = NULL_TREE, lsq;
4194 tree offset = NULL_TREE;
4195 tree realignment_token = NULL_TREE;
4196 gimple phi = NULL;
4197 VEC(tree,heap) *dr_chain = NULL;
4198 bool strided_load = false;
4199 bool load_lanes_p = false;
4200 gimple first_stmt;
4201 bool inv_p;
4202 bool negative;
4203 bool compute_in_loop = false;
4204 struct loop *at_loop;
4205 int vec_num;
4206 bool slp = (slp_node != NULL);
4207 bool slp_perm = false;
4208 enum tree_code code;
4209 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4210 int vf;
4211 tree aggr_type;
4212 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4213 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4214 int gather_scale = 1;
4215 enum vect_def_type gather_dt = vect_unknown_def_type;
4216
4217 if (loop_vinfo)
4218 {
4219 loop = LOOP_VINFO_LOOP (loop_vinfo);
4220 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4221 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4222 }
4223 else
4224 vf = 1;
4225
4226 /* Multiple types in SLP are handled by creating the appropriate number of
4227 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4228 case of SLP. */
4229 if (slp || PURE_SLP_STMT (stmt_info))
4230 ncopies = 1;
4231 else
4232 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4233
4234 gcc_assert (ncopies >= 1);
4235
4236 /* FORNOW. This restriction should be relaxed. */
4237 if (nested_in_vect_loop && ncopies > 1)
4238 {
4239 if (vect_print_dump_info (REPORT_DETAILS))
4240 fprintf (vect_dump, "multiple types in nested loop.");
4241 return false;
4242 }
4243
4244 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4245 return false;
4246
4247 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4248 return false;
4249
4250 /* Is vectorizable load? */
4251 if (!is_gimple_assign (stmt))
4252 return false;
4253
4254 scalar_dest = gimple_assign_lhs (stmt);
4255 if (TREE_CODE (scalar_dest) != SSA_NAME)
4256 return false;
4257
4258 code = gimple_assign_rhs_code (stmt);
4259 if (code != ARRAY_REF
4260 && code != INDIRECT_REF
4261 && code != COMPONENT_REF
4262 && code != IMAGPART_EXPR
4263 && code != REALPART_EXPR
4264 && code != MEM_REF
4265 && TREE_CODE_CLASS (code) != tcc_declaration)
4266 return false;
4267
4268 if (!STMT_VINFO_DATA_REF (stmt_info))
4269 return false;
4270
4271 negative = tree_int_cst_compare (nested_in_vect_loop
4272 ? STMT_VINFO_DR_STEP (stmt_info)
4273 : DR_STEP (dr),
4274 size_zero_node) < 0;
4275 if (negative && ncopies > 1)
4276 {
4277 if (vect_print_dump_info (REPORT_DETAILS))
4278 fprintf (vect_dump, "multiple types with negative step.");
4279 return false;
4280 }
4281
4282 elem_type = TREE_TYPE (vectype);
4283 mode = TYPE_MODE (vectype);
4284
4285 /* FORNOW. In some cases can vectorize even if data-type not supported
4286 (e.g. - data copies). */
4287 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4288 {
4289 if (vect_print_dump_info (REPORT_DETAILS))
4290 fprintf (vect_dump, "Aligned load, but unsupported type.");
4291 return false;
4292 }
4293
4294 /* Check if the load is a part of an interleaving chain. */
4295 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4296 {
4297 strided_load = true;
4298 /* FORNOW */
4299 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4300
4301 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4302 if (!slp && !PURE_SLP_STMT (stmt_info))
4303 {
4304 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4305 if (vect_load_lanes_supported (vectype, group_size))
4306 load_lanes_p = true;
4307 else if (!vect_strided_load_supported (vectype, group_size))
4308 return false;
4309 }
4310 }
4311
4312 if (negative)
4313 {
4314 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4315 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4316 if (alignment_support_scheme != dr_aligned
4317 && alignment_support_scheme != dr_unaligned_supported)
4318 {
4319 if (vect_print_dump_info (REPORT_DETAILS))
4320 fprintf (vect_dump, "negative step but alignment required.");
4321 return false;
4322 }
4323 if (!perm_mask_for_reverse (vectype))
4324 {
4325 if (vect_print_dump_info (REPORT_DETAILS))
4326 fprintf (vect_dump, "negative step and reversing not supported.");
4327 return false;
4328 }
4329 }
4330
4331 if (STMT_VINFO_GATHER_P (stmt_info))
4332 {
4333 gimple def_stmt;
4334 tree def;
4335 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4336 &gather_off, &gather_scale);
4337 gcc_assert (gather_decl);
4338 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4339 &def_stmt, &def, &gather_dt,
4340 &gather_off_vectype))
4341 {
4342 if (vect_print_dump_info (REPORT_DETAILS))
4343 fprintf (vect_dump, "gather index use not simple.");
4344 return false;
4345 }
4346 }
4347
4348 if (!vec_stmt) /* transformation not required. */
4349 {
4350 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4351 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4352 return true;
4353 }
4354
4355 if (vect_print_dump_info (REPORT_DETAILS))
4356 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4357
4358 /** Transform. **/
4359
4360 if (STMT_VINFO_GATHER_P (stmt_info))
4361 {
4362 tree vec_oprnd0 = NULL_TREE, op;
4363 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4364 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4365 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4366 edge pe = loop_preheader_edge (loop);
4367 gimple_seq seq;
4368 basic_block new_bb;
4369 enum { NARROW, NONE, WIDEN } modifier;
4370 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4371
4372 if (nunits == gather_off_nunits)
4373 modifier = NONE;
4374 else if (nunits == gather_off_nunits / 2)
4375 {
4376 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4377 modifier = WIDEN;
4378
4379 for (i = 0; i < gather_off_nunits; ++i)
4380 sel[i] = i | nunits;
4381
4382 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4383 gcc_assert (perm_mask != NULL_TREE);
4384 }
4385 else if (nunits == gather_off_nunits * 2)
4386 {
4387 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4388 modifier = NARROW;
4389
4390 for (i = 0; i < nunits; ++i)
4391 sel[i] = i < gather_off_nunits
4392 ? i : i + nunits - gather_off_nunits;
4393
4394 perm_mask = vect_gen_perm_mask (vectype, sel);
4395 gcc_assert (perm_mask != NULL_TREE);
4396 ncopies *= 2;
4397 }
4398 else
4399 gcc_unreachable ();
4400
4401 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4402 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4403 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4404 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4405 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4406 scaletype = TREE_VALUE (arglist);
4407 gcc_checking_assert (types_compatible_p (srctype, rettype)
4408 && types_compatible_p (srctype, masktype));
4409
4410 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4411
4412 ptr = fold_convert (ptrtype, gather_base);
4413 if (!is_gimple_min_invariant (ptr))
4414 {
4415 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4416 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4417 gcc_assert (!new_bb);
4418 }
4419
4420 /* Currently we support only unconditional gather loads,
4421 so mask should be all ones. */
4422 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4423 mask = build_int_cst (TREE_TYPE (masktype), -1);
4424 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4425 {
4426 REAL_VALUE_TYPE r;
4427 long tmp[6];
4428 for (j = 0; j < 6; ++j)
4429 tmp[j] = -1;
4430 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4431 mask = build_real (TREE_TYPE (masktype), r);
4432 }
4433 else
4434 gcc_unreachable ();
4435 mask = build_vector_from_val (masktype, mask);
4436 mask = vect_init_vector (stmt, mask, masktype, NULL);
4437
4438 scale = build_int_cst (scaletype, gather_scale);
4439
4440 prev_stmt_info = NULL;
4441 for (j = 0; j < ncopies; ++j)
4442 {
4443 if (modifier == WIDEN && (j & 1))
4444 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4445 perm_mask, stmt, gsi);
4446 else if (j == 0)
4447 op = vec_oprnd0
4448 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4449 else
4450 op = vec_oprnd0
4451 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4452
4453 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4454 {
4455 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4456 == TYPE_VECTOR_SUBPARTS (idxtype));
4457 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4458 add_referenced_var (var);
4459 var = make_ssa_name (var, NULL);
4460 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4461 new_stmt
4462 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4463 op, NULL_TREE);
4464 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4465 op = var;
4466 }
4467
4468 new_stmt
4469 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4470
4471 if (!useless_type_conversion_p (vectype, rettype))
4472 {
4473 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4474 == TYPE_VECTOR_SUBPARTS (rettype));
4475 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4476 add_referenced_var (var);
4477 op = make_ssa_name (var, new_stmt);
4478 gimple_call_set_lhs (new_stmt, op);
4479 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4480 var = make_ssa_name (vec_dest, NULL);
4481 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4482 new_stmt
4483 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4484 NULL_TREE);
4485 }
4486 else
4487 {
4488 var = make_ssa_name (vec_dest, new_stmt);
4489 gimple_call_set_lhs (new_stmt, var);
4490 }
4491
4492 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4493
4494 if (modifier == NARROW)
4495 {
4496 if ((j & 1) == 0)
4497 {
4498 prev_res = var;
4499 continue;
4500 }
4501 var = permute_vec_elements (prev_res, var,
4502 perm_mask, stmt, gsi);
4503 new_stmt = SSA_NAME_DEF_STMT (var);
4504 }
4505
4506 if (prev_stmt_info == NULL)
4507 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4508 else
4509 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4510 prev_stmt_info = vinfo_for_stmt (new_stmt);
4511 }
4512 return true;
4513 }
4514
4515 if (strided_load)
4516 {
4517 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4518 if (slp
4519 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4520 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4521 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4522
4523 /* Check if the chain of loads is already vectorized. */
4524 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4525 {
4526 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4527 return true;
4528 }
4529 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4530 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4531
4532 /* VEC_NUM is the number of vect stmts to be created for this group. */
4533 if (slp)
4534 {
4535 strided_load = false;
4536 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4537 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4538 slp_perm = true;
4539 }
4540 else
4541 vec_num = group_size;
4542 }
4543 else
4544 {
4545 first_stmt = stmt;
4546 first_dr = dr;
4547 group_size = vec_num = 1;
4548 }
4549
4550 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4551 gcc_assert (alignment_support_scheme);
4552 /* Targets with load-lane instructions must not require explicit
4553 realignment. */
4554 gcc_assert (!load_lanes_p
4555 || alignment_support_scheme == dr_aligned
4556 || alignment_support_scheme == dr_unaligned_supported);
4557
4558 /* In case the vectorization factor (VF) is bigger than the number
4559 of elements that we can fit in a vectype (nunits), we have to generate
4560 more than one vector stmt - i.e - we need to "unroll" the
4561 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4562 from one copy of the vector stmt to the next, in the field
4563 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4564 stages to find the correct vector defs to be used when vectorizing
4565 stmts that use the defs of the current stmt. The example below
4566 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4567 need to create 4 vectorized stmts):
4568
4569 before vectorization:
4570 RELATED_STMT VEC_STMT
4571 S1: x = memref - -
4572 S2: z = x + 1 - -
4573
4574 step 1: vectorize stmt S1:
4575 We first create the vector stmt VS1_0, and, as usual, record a
4576 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4577 Next, we create the vector stmt VS1_1, and record a pointer to
4578 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4579 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4580 stmts and pointers:
4581 RELATED_STMT VEC_STMT
4582 VS1_0: vx0 = memref0 VS1_1 -
4583 VS1_1: vx1 = memref1 VS1_2 -
4584 VS1_2: vx2 = memref2 VS1_3 -
4585 VS1_3: vx3 = memref3 - -
4586 S1: x = load - VS1_0
4587 S2: z = x + 1 - -
4588
4589 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4590 information we recorded in RELATED_STMT field is used to vectorize
4591 stmt S2. */
4592
4593 /* In case of interleaving (non-unit strided access):
4594
4595 S1: x2 = &base + 2
4596 S2: x0 = &base
4597 S3: x1 = &base + 1
4598 S4: x3 = &base + 3
4599
4600 Vectorized loads are created in the order of memory accesses
4601 starting from the access of the first stmt of the chain:
4602
4603 VS1: vx0 = &base
4604 VS2: vx1 = &base + vec_size*1
4605 VS3: vx3 = &base + vec_size*2
4606 VS4: vx4 = &base + vec_size*3
4607
4608 Then permutation statements are generated:
4609
4610 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4611 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4612 ...
4613
4614 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4615 (the order of the data-refs in the output of vect_permute_load_chain
4616 corresponds to the order of scalar stmts in the interleaving chain - see
4617 the documentation of vect_permute_load_chain()).
4618 The generation of permutation stmts and recording them in
4619 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4620
4621 In case of both multiple types and interleaving, the vector loads and
4622 permutation stmts above are created for every copy. The result vector
4623 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4624 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4625
4626 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4627 on a target that supports unaligned accesses (dr_unaligned_supported)
4628 we generate the following code:
4629 p = initial_addr;
4630 indx = 0;
4631 loop {
4632 p = p + indx * vectype_size;
4633 vec_dest = *(p);
4634 indx = indx + 1;
4635 }
4636
4637 Otherwise, the data reference is potentially unaligned on a target that
4638 does not support unaligned accesses (dr_explicit_realign_optimized) -
4639 then generate the following code, in which the data in each iteration is
4640 obtained by two vector loads, one from the previous iteration, and one
4641 from the current iteration:
4642 p1 = initial_addr;
4643 msq_init = *(floor(p1))
4644 p2 = initial_addr + VS - 1;
4645 realignment_token = call target_builtin;
4646 indx = 0;
4647 loop {
4648 p2 = p2 + indx * vectype_size
4649 lsq = *(floor(p2))
4650 vec_dest = realign_load (msq, lsq, realignment_token)
4651 indx = indx + 1;
4652 msq = lsq;
4653 } */
4654
4655 /* If the misalignment remains the same throughout the execution of the
4656 loop, we can create the init_addr and permutation mask at the loop
4657 preheader. Otherwise, it needs to be created inside the loop.
4658 This can only occur when vectorizing memory accesses in the inner-loop
4659 nested within an outer-loop that is being vectorized. */
4660
4661 if (nested_in_vect_loop
4662 && (TREE_INT_CST_LOW (DR_STEP (dr))
4663 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4664 {
4665 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4666 compute_in_loop = true;
4667 }
4668
4669 if ((alignment_support_scheme == dr_explicit_realign_optimized
4670 || alignment_support_scheme == dr_explicit_realign)
4671 && !compute_in_loop)
4672 {
4673 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4674 alignment_support_scheme, NULL_TREE,
4675 &at_loop);
4676 if (alignment_support_scheme == dr_explicit_realign_optimized)
4677 {
4678 phi = SSA_NAME_DEF_STMT (msq);
4679 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4680 }
4681 }
4682 else
4683 at_loop = loop;
4684
4685 if (negative)
4686 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4687
4688 if (load_lanes_p)
4689 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4690 else
4691 aggr_type = vectype;
4692
4693 prev_stmt_info = NULL;
4694 for (j = 0; j < ncopies; j++)
4695 {
4696 /* 1. Create the vector or array pointer update chain. */
4697 if (j == 0)
4698 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4699 offset, &dummy, gsi,
4700 &ptr_incr, false, &inv_p);
4701 else
4702 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4703 TYPE_SIZE_UNIT (aggr_type));
4704
4705 if (strided_load || slp_perm)
4706 dr_chain = VEC_alloc (tree, heap, vec_num);
4707
4708 if (load_lanes_p)
4709 {
4710 tree vec_array;
4711
4712 vec_array = create_vector_array (vectype, vec_num);
4713
4714 /* Emit:
4715 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4716 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4717 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4718 gimple_call_set_lhs (new_stmt, vec_array);
4719 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4720 mark_symbols_for_renaming (new_stmt);
4721
4722 /* Extract each vector into an SSA_NAME. */
4723 for (i = 0; i < vec_num; i++)
4724 {
4725 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4726 vec_array, i);
4727 VEC_quick_push (tree, dr_chain, new_temp);
4728 }
4729
4730 /* Record the mapping between SSA_NAMEs and statements. */
4731 vect_record_strided_load_vectors (stmt, dr_chain);
4732 }
4733 else
4734 {
4735 for (i = 0; i < vec_num; i++)
4736 {
4737 if (i > 0)
4738 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4739 stmt, NULL_TREE);
4740
4741 /* 2. Create the vector-load in the loop. */
4742 switch (alignment_support_scheme)
4743 {
4744 case dr_aligned:
4745 case dr_unaligned_supported:
4746 {
4747 struct ptr_info_def *pi;
4748 data_ref
4749 = build2 (MEM_REF, vectype, dataref_ptr,
4750 build_int_cst (reference_alias_ptr_type
4751 (DR_REF (first_dr)), 0));
4752 pi = get_ptr_info (dataref_ptr);
4753 pi->align = TYPE_ALIGN_UNIT (vectype);
4754 if (alignment_support_scheme == dr_aligned)
4755 {
4756 gcc_assert (aligned_access_p (first_dr));
4757 pi->misalign = 0;
4758 }
4759 else if (DR_MISALIGNMENT (first_dr) == -1)
4760 {
4761 TREE_TYPE (data_ref)
4762 = build_aligned_type (TREE_TYPE (data_ref),
4763 TYPE_ALIGN (elem_type));
4764 pi->align = TYPE_ALIGN_UNIT (elem_type);
4765 pi->misalign = 0;
4766 }
4767 else
4768 {
4769 TREE_TYPE (data_ref)
4770 = build_aligned_type (TREE_TYPE (data_ref),
4771 TYPE_ALIGN (elem_type));
4772 pi->misalign = DR_MISALIGNMENT (first_dr);
4773 }
4774 break;
4775 }
4776 case dr_explicit_realign:
4777 {
4778 tree ptr, bump;
4779 tree vs_minus_1;
4780
4781 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4782
4783 if (compute_in_loop)
4784 msq = vect_setup_realignment (first_stmt, gsi,
4785 &realignment_token,
4786 dr_explicit_realign,
4787 dataref_ptr, NULL);
4788
4789 new_stmt = gimple_build_assign_with_ops
4790 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4791 build_int_cst
4792 (TREE_TYPE (dataref_ptr),
4793 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4794 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4795 gimple_assign_set_lhs (new_stmt, ptr);
4796 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4797 data_ref
4798 = build2 (MEM_REF, vectype, ptr,
4799 build_int_cst (reference_alias_ptr_type
4800 (DR_REF (first_dr)), 0));
4801 vec_dest = vect_create_destination_var (scalar_dest,
4802 vectype);
4803 new_stmt = gimple_build_assign (vec_dest, data_ref);
4804 new_temp = make_ssa_name (vec_dest, new_stmt);
4805 gimple_assign_set_lhs (new_stmt, new_temp);
4806 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4807 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4808 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4809 msq = new_temp;
4810
4811 bump = size_binop (MULT_EXPR, vs_minus_1,
4812 TYPE_SIZE_UNIT (elem_type));
4813 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4814 new_stmt = gimple_build_assign_with_ops
4815 (BIT_AND_EXPR, NULL_TREE, ptr,
4816 build_int_cst
4817 (TREE_TYPE (ptr),
4818 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4819 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4820 gimple_assign_set_lhs (new_stmt, ptr);
4821 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4822 data_ref
4823 = build2 (MEM_REF, vectype, ptr,
4824 build_int_cst (reference_alias_ptr_type
4825 (DR_REF (first_dr)), 0));
4826 break;
4827 }
4828 case dr_explicit_realign_optimized:
4829 new_stmt = gimple_build_assign_with_ops
4830 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4831 build_int_cst
4832 (TREE_TYPE (dataref_ptr),
4833 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4834 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4835 new_stmt);
4836 gimple_assign_set_lhs (new_stmt, new_temp);
4837 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4838 data_ref
4839 = build2 (MEM_REF, vectype, new_temp,
4840 build_int_cst (reference_alias_ptr_type
4841 (DR_REF (first_dr)), 0));
4842 break;
4843 default:
4844 gcc_unreachable ();
4845 }
4846 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4847 new_stmt = gimple_build_assign (vec_dest, data_ref);
4848 new_temp = make_ssa_name (vec_dest, new_stmt);
4849 gimple_assign_set_lhs (new_stmt, new_temp);
4850 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4851 mark_symbols_for_renaming (new_stmt);
4852
4853 /* 3. Handle explicit realignment if necessary/supported.
4854 Create in loop:
4855 vec_dest = realign_load (msq, lsq, realignment_token) */
4856 if (alignment_support_scheme == dr_explicit_realign_optimized
4857 || alignment_support_scheme == dr_explicit_realign)
4858 {
4859 lsq = gimple_assign_lhs (new_stmt);
4860 if (!realignment_token)
4861 realignment_token = dataref_ptr;
4862 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4863 new_stmt
4864 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4865 vec_dest, msq, lsq,
4866 realignment_token);
4867 new_temp = make_ssa_name (vec_dest, new_stmt);
4868 gimple_assign_set_lhs (new_stmt, new_temp);
4869 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4870
4871 if (alignment_support_scheme == dr_explicit_realign_optimized)
4872 {
4873 gcc_assert (phi);
4874 if (i == vec_num - 1 && j == ncopies - 1)
4875 add_phi_arg (phi, lsq,
4876 loop_latch_edge (containing_loop),
4877 UNKNOWN_LOCATION);
4878 msq = lsq;
4879 }
4880 }
4881
4882 /* 4. Handle invariant-load. */
4883 if (inv_p && !bb_vinfo)
4884 {
4885 tree tem, vec_inv;
4886 gimple_stmt_iterator gsi2 = *gsi;
4887 gcc_assert (!strided_load);
4888 gsi_next (&gsi2);
4889 tem = scalar_dest;
4890 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4891 TREE_TYPE (tem)))
4892 {
4893 tem = fold_convert (TREE_TYPE (vectype), tem);
4894 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4895 NULL_TREE, true,
4896 GSI_SAME_STMT);
4897 }
4898 vec_inv = build_vector_from_val (vectype, tem);
4899 new_temp = vect_init_vector (stmt, vec_inv,
4900 vectype, &gsi2);
4901 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4902 }
4903
4904 if (negative)
4905 {
4906 tree perm_mask = perm_mask_for_reverse (vectype);
4907 new_temp = permute_vec_elements (new_temp, new_temp,
4908 perm_mask, stmt, gsi);
4909 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4910 }
4911
4912 /* Collect vector loads and later create their permutation in
4913 vect_transform_strided_load (). */
4914 if (strided_load || slp_perm)
4915 VEC_quick_push (tree, dr_chain, new_temp);
4916
4917 /* Store vector loads in the corresponding SLP_NODE. */
4918 if (slp && !slp_perm)
4919 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4920 new_stmt);
4921 }
4922 }
4923
4924 if (slp && !slp_perm)
4925 continue;
4926
4927 if (slp_perm)
4928 {
4929 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4930 slp_node_instance, false))
4931 {
4932 VEC_free (tree, heap, dr_chain);
4933 return false;
4934 }
4935 }
4936 else
4937 {
4938 if (strided_load)
4939 {
4940 if (!load_lanes_p)
4941 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4942 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4943 }
4944 else
4945 {
4946 if (j == 0)
4947 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4948 else
4949 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4950 prev_stmt_info = vinfo_for_stmt (new_stmt);
4951 }
4952 }
4953 if (dr_chain)
4954 VEC_free (tree, heap, dr_chain);
4955 }
4956
4957 return true;
4958 }
4959
4960 /* Function vect_is_simple_cond.
4961
4962 Input:
4963 LOOP - the loop that is being vectorized.
4964 COND - Condition that is checked for simple use.
4965
4966 Output:
4967 *COMP_VECTYPE - the vector type for the comparison.
4968
4969 Returns whether a COND can be vectorized. Checks whether
4970 condition operands are supportable using vec_is_simple_use. */
4971
4972 static bool
4973 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
4974 bb_vec_info bb_vinfo, tree *comp_vectype)
4975 {
4976 tree lhs, rhs;
4977 tree def;
4978 enum vect_def_type dt;
4979 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4980
4981 if (!COMPARISON_CLASS_P (cond))
4982 return false;
4983
4984 lhs = TREE_OPERAND (cond, 0);
4985 rhs = TREE_OPERAND (cond, 1);
4986
4987 if (TREE_CODE (lhs) == SSA_NAME)
4988 {
4989 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4990 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
4991 &lhs_def_stmt, &def, &dt, &vectype1))
4992 return false;
4993 }
4994 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4995 && TREE_CODE (lhs) != FIXED_CST)
4996 return false;
4997
4998 if (TREE_CODE (rhs) == SSA_NAME)
4999 {
5000 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5001 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5002 &rhs_def_stmt, &def, &dt, &vectype2))
5003 return false;
5004 }
5005 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5006 && TREE_CODE (rhs) != FIXED_CST)
5007 return false;
5008
5009 *comp_vectype = vectype1 ? vectype1 : vectype2;
5010 return true;
5011 }
5012
5013 /* vectorizable_condition.
5014
5015 Check if STMT is conditional modify expression that can be vectorized.
5016 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5017 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5018 at GSI.
5019
5020 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5021 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5022 else caluse if it is 2).
5023
5024 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5025
5026 bool
5027 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5028 gimple *vec_stmt, tree reduc_def, int reduc_index,
5029 slp_tree slp_node)
5030 {
5031 tree scalar_dest = NULL_TREE;
5032 tree vec_dest = NULL_TREE;
5033 tree cond_expr, then_clause, else_clause;
5034 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5035 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5036 tree comp_vectype = NULL_TREE;
5037 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5038 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5039 tree vec_compare, vec_cond_expr;
5040 tree new_temp;
5041 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5042 tree def;
5043 enum vect_def_type dt, dts[4];
5044 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5045 int ncopies;
5046 enum tree_code code;
5047 stmt_vec_info prev_stmt_info = NULL;
5048 int i, j;
5049 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5050 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5051 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5052
5053 if (slp_node || PURE_SLP_STMT (stmt_info))
5054 ncopies = 1;
5055 else
5056 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5057
5058 gcc_assert (ncopies >= 1);
5059 if (reduc_index && ncopies > 1)
5060 return false; /* FORNOW */
5061
5062 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5063 return false;
5064
5065 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5066 return false;
5067
5068 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5069 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5070 && reduc_def))
5071 return false;
5072
5073 /* FORNOW: not yet supported. */
5074 if (STMT_VINFO_LIVE_P (stmt_info))
5075 {
5076 if (vect_print_dump_info (REPORT_DETAILS))
5077 fprintf (vect_dump, "value used after loop.");
5078 return false;
5079 }
5080
5081 /* Is vectorizable conditional operation? */
5082 if (!is_gimple_assign (stmt))
5083 return false;
5084
5085 code = gimple_assign_rhs_code (stmt);
5086
5087 if (code != COND_EXPR)
5088 return false;
5089
5090 cond_expr = gimple_assign_rhs1 (stmt);
5091 then_clause = gimple_assign_rhs2 (stmt);
5092 else_clause = gimple_assign_rhs3 (stmt);
5093
5094 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5095 &comp_vectype)
5096 || !comp_vectype)
5097 return false;
5098
5099 if (TREE_CODE (then_clause) == SSA_NAME)
5100 {
5101 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5102 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5103 &then_def_stmt, &def, &dt))
5104 return false;
5105 }
5106 else if (TREE_CODE (then_clause) != INTEGER_CST
5107 && TREE_CODE (then_clause) != REAL_CST
5108 && TREE_CODE (then_clause) != FIXED_CST)
5109 return false;
5110
5111 if (TREE_CODE (else_clause) == SSA_NAME)
5112 {
5113 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5114 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5115 &else_def_stmt, &def, &dt))
5116 return false;
5117 }
5118 else if (TREE_CODE (else_clause) != INTEGER_CST
5119 && TREE_CODE (else_clause) != REAL_CST
5120 && TREE_CODE (else_clause) != FIXED_CST)
5121 return false;
5122
5123 if (!vec_stmt)
5124 {
5125 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5126 return expand_vec_cond_expr_p (vectype, comp_vectype);
5127 }
5128
5129 /* Transform. */
5130
5131 if (!slp_node)
5132 {
5133 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5134 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5135 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5136 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5137 }
5138
5139 /* Handle def. */
5140 scalar_dest = gimple_assign_lhs (stmt);
5141 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5142
5143 /* Handle cond expr. */
5144 for (j = 0; j < ncopies; j++)
5145 {
5146 gimple new_stmt = NULL;
5147 if (j == 0)
5148 {
5149 if (slp_node)
5150 {
5151 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5152 VEC (slp_void_p, heap) *vec_defs;
5153
5154 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5155 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5156 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5157 VEC_safe_push (tree, heap, ops, then_clause);
5158 VEC_safe_push (tree, heap, ops, else_clause);
5159 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5160 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5161 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5162 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5163 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5164
5165 VEC_free (tree, heap, ops);
5166 VEC_free (slp_void_p, heap, vec_defs);
5167 }
5168 else
5169 {
5170 gimple gtemp;
5171 vec_cond_lhs =
5172 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5173 stmt, NULL);
5174 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5175 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5176
5177 vec_cond_rhs =
5178 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5179 stmt, NULL);
5180 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5181 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5182 if (reduc_index == 1)
5183 vec_then_clause = reduc_def;
5184 else
5185 {
5186 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5187 stmt, NULL);
5188 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5189 NULL, &gtemp, &def, &dts[2]);
5190 }
5191 if (reduc_index == 2)
5192 vec_else_clause = reduc_def;
5193 else
5194 {
5195 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5196 stmt, NULL);
5197 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5198 NULL, &gtemp, &def, &dts[3]);
5199 }
5200 }
5201 }
5202 else
5203 {
5204 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5205 VEC_pop (tree, vec_oprnds0));
5206 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5207 VEC_pop (tree, vec_oprnds1));
5208 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5209 VEC_pop (tree, vec_oprnds2));
5210 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5211 VEC_pop (tree, vec_oprnds3));
5212 }
5213
5214 if (!slp_node)
5215 {
5216 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5217 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5218 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5219 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5220 }
5221
5222 /* Arguments are ready. Create the new vector stmt. */
5223 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5224 {
5225 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5226 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5227 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5228
5229 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5230 vec_cond_lhs, vec_cond_rhs);
5231 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5232 vec_compare, vec_then_clause, vec_else_clause);
5233
5234 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5235 new_temp = make_ssa_name (vec_dest, new_stmt);
5236 gimple_assign_set_lhs (new_stmt, new_temp);
5237 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5238 if (slp_node)
5239 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5240 }
5241
5242 if (slp_node)
5243 continue;
5244
5245 if (j == 0)
5246 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5247 else
5248 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5249
5250 prev_stmt_info = vinfo_for_stmt (new_stmt);
5251 }
5252
5253 VEC_free (tree, heap, vec_oprnds0);
5254 VEC_free (tree, heap, vec_oprnds1);
5255 VEC_free (tree, heap, vec_oprnds2);
5256 VEC_free (tree, heap, vec_oprnds3);
5257
5258 return true;
5259 }
5260
5261
5262 /* Make sure the statement is vectorizable. */
5263
5264 bool
5265 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5266 {
5267 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5268 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5269 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5270 bool ok;
5271 tree scalar_type, vectype;
5272 gimple pattern_stmt;
5273 gimple_seq pattern_def_seq;
5274
5275 if (vect_print_dump_info (REPORT_DETAILS))
5276 {
5277 fprintf (vect_dump, "==> examining statement: ");
5278 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5279 }
5280
5281 if (gimple_has_volatile_ops (stmt))
5282 {
5283 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5284 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5285
5286 return false;
5287 }
5288
5289 /* Skip stmts that do not need to be vectorized. In loops this is expected
5290 to include:
5291 - the COND_EXPR which is the loop exit condition
5292 - any LABEL_EXPRs in the loop
5293 - computations that are used only for array indexing or loop control.
5294 In basic blocks we only analyze statements that are a part of some SLP
5295 instance, therefore, all the statements are relevant.
5296
5297 Pattern statement needs to be analyzed instead of the original statement
5298 if the original statement is not relevant. Otherwise, we analyze both
5299 statements. */
5300
5301 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5302 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5303 && !STMT_VINFO_LIVE_P (stmt_info))
5304 {
5305 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5306 && pattern_stmt
5307 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5308 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5309 {
5310 /* Analyze PATTERN_STMT instead of the original stmt. */
5311 stmt = pattern_stmt;
5312 stmt_info = vinfo_for_stmt (pattern_stmt);
5313 if (vect_print_dump_info (REPORT_DETAILS))
5314 {
5315 fprintf (vect_dump, "==> examining pattern statement: ");
5316 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5317 }
5318 }
5319 else
5320 {
5321 if (vect_print_dump_info (REPORT_DETAILS))
5322 fprintf (vect_dump, "irrelevant.");
5323
5324 return true;
5325 }
5326 }
5327 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5328 && pattern_stmt
5329 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5330 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5331 {
5332 /* Analyze PATTERN_STMT too. */
5333 if (vect_print_dump_info (REPORT_DETAILS))
5334 {
5335 fprintf (vect_dump, "==> examining pattern statement: ");
5336 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5337 }
5338
5339 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5340 return false;
5341 }
5342
5343 if (is_pattern_stmt_p (stmt_info)
5344 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5345 {
5346 gimple_stmt_iterator si;
5347
5348 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5349 {
5350 gimple pattern_def_stmt = gsi_stmt (si);
5351 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5352 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5353 {
5354 /* Analyze def stmt of STMT if it's a pattern stmt. */
5355 if (vect_print_dump_info (REPORT_DETAILS))
5356 {
5357 fprintf (vect_dump, "==> examining pattern def statement: ");
5358 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5359 }
5360
5361 if (!vect_analyze_stmt (pattern_def_stmt,
5362 need_to_vectorize, node))
5363 return false;
5364 }
5365 }
5366 }
5367
5368 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5369 {
5370 case vect_internal_def:
5371 break;
5372
5373 case vect_reduction_def:
5374 case vect_nested_cycle:
5375 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5376 || relevance == vect_used_in_outer_by_reduction
5377 || relevance == vect_unused_in_scope));
5378 break;
5379
5380 case vect_induction_def:
5381 case vect_constant_def:
5382 case vect_external_def:
5383 case vect_unknown_def_type:
5384 default:
5385 gcc_unreachable ();
5386 }
5387
5388 if (bb_vinfo)
5389 {
5390 gcc_assert (PURE_SLP_STMT (stmt_info));
5391
5392 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5393 if (vect_print_dump_info (REPORT_DETAILS))
5394 {
5395 fprintf (vect_dump, "get vectype for scalar type: ");
5396 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5397 }
5398
5399 vectype = get_vectype_for_scalar_type (scalar_type);
5400 if (!vectype)
5401 {
5402 if (vect_print_dump_info (REPORT_DETAILS))
5403 {
5404 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5405 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5406 }
5407 return false;
5408 }
5409
5410 if (vect_print_dump_info (REPORT_DETAILS))
5411 {
5412 fprintf (vect_dump, "vectype: ");
5413 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5414 }
5415
5416 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5417 }
5418
5419 if (STMT_VINFO_RELEVANT_P (stmt_info))
5420 {
5421 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5422 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5423 *need_to_vectorize = true;
5424 }
5425
5426 ok = true;
5427 if (!bb_vinfo
5428 && (STMT_VINFO_RELEVANT_P (stmt_info)
5429 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5430 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5431 || vectorizable_shift (stmt, NULL, NULL, NULL)
5432 || vectorizable_operation (stmt, NULL, NULL, NULL)
5433 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5434 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5435 || vectorizable_call (stmt, NULL, NULL, NULL)
5436 || vectorizable_store (stmt, NULL, NULL, NULL)
5437 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5438 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5439 else
5440 {
5441 if (bb_vinfo)
5442 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5443 || vectorizable_shift (stmt, NULL, NULL, node)
5444 || vectorizable_operation (stmt, NULL, NULL, node)
5445 || vectorizable_assignment (stmt, NULL, NULL, node)
5446 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5447 || vectorizable_call (stmt, NULL, NULL, node)
5448 || vectorizable_store (stmt, NULL, NULL, node)
5449 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5450 }
5451
5452 if (!ok)
5453 {
5454 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5455 {
5456 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5457 fprintf (vect_dump, "supported: ");
5458 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5459 }
5460
5461 return false;
5462 }
5463
5464 if (bb_vinfo)
5465 return true;
5466
5467 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5468 need extra handling, except for vectorizable reductions. */
5469 if (STMT_VINFO_LIVE_P (stmt_info)
5470 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5471 ok = vectorizable_live_operation (stmt, NULL, NULL);
5472
5473 if (!ok)
5474 {
5475 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5476 {
5477 fprintf (vect_dump, "not vectorized: live stmt not ");
5478 fprintf (vect_dump, "supported: ");
5479 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5480 }
5481
5482 return false;
5483 }
5484
5485 return true;
5486 }
5487
5488
5489 /* Function vect_transform_stmt.
5490
5491 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5492
5493 bool
5494 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5495 bool *strided_store, slp_tree slp_node,
5496 slp_instance slp_node_instance)
5497 {
5498 bool is_store = false;
5499 gimple vec_stmt = NULL;
5500 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5501 bool done;
5502
5503 switch (STMT_VINFO_TYPE (stmt_info))
5504 {
5505 case type_demotion_vec_info_type:
5506 case type_promotion_vec_info_type:
5507 case type_conversion_vec_info_type:
5508 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5509 gcc_assert (done);
5510 break;
5511
5512 case induc_vec_info_type:
5513 gcc_assert (!slp_node);
5514 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5515 gcc_assert (done);
5516 break;
5517
5518 case shift_vec_info_type:
5519 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5520 gcc_assert (done);
5521 break;
5522
5523 case op_vec_info_type:
5524 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5525 gcc_assert (done);
5526 break;
5527
5528 case assignment_vec_info_type:
5529 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5530 gcc_assert (done);
5531 break;
5532
5533 case load_vec_info_type:
5534 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5535 slp_node_instance);
5536 gcc_assert (done);
5537 break;
5538
5539 case store_vec_info_type:
5540 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5541 gcc_assert (done);
5542 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5543 {
5544 /* In case of interleaving, the whole chain is vectorized when the
5545 last store in the chain is reached. Store stmts before the last
5546 one are skipped, and there vec_stmt_info shouldn't be freed
5547 meanwhile. */
5548 *strided_store = true;
5549 if (STMT_VINFO_VEC_STMT (stmt_info))
5550 is_store = true;
5551 }
5552 else
5553 is_store = true;
5554 break;
5555
5556 case condition_vec_info_type:
5557 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5558 gcc_assert (done);
5559 break;
5560
5561 case call_vec_info_type:
5562 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5563 stmt = gsi_stmt (*gsi);
5564 break;
5565
5566 case reduc_vec_info_type:
5567 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5568 gcc_assert (done);
5569 break;
5570
5571 default:
5572 if (!STMT_VINFO_LIVE_P (stmt_info))
5573 {
5574 if (vect_print_dump_info (REPORT_DETAILS))
5575 fprintf (vect_dump, "stmt not supported.");
5576 gcc_unreachable ();
5577 }
5578 }
5579
5580 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5581 is being vectorized, but outside the immediately enclosing loop. */
5582 if (vec_stmt
5583 && STMT_VINFO_LOOP_VINFO (stmt_info)
5584 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5585 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5586 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5587 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5588 || STMT_VINFO_RELEVANT (stmt_info) ==
5589 vect_used_in_outer_by_reduction))
5590 {
5591 struct loop *innerloop = LOOP_VINFO_LOOP (
5592 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5593 imm_use_iterator imm_iter;
5594 use_operand_p use_p;
5595 tree scalar_dest;
5596 gimple exit_phi;
5597
5598 if (vect_print_dump_info (REPORT_DETAILS))
5599 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5600
5601 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5602 (to be used when vectorizing outer-loop stmts that use the DEF of
5603 STMT). */
5604 if (gimple_code (stmt) == GIMPLE_PHI)
5605 scalar_dest = PHI_RESULT (stmt);
5606 else
5607 scalar_dest = gimple_assign_lhs (stmt);
5608
5609 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5610 {
5611 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5612 {
5613 exit_phi = USE_STMT (use_p);
5614 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5615 }
5616 }
5617 }
5618
5619 /* Handle stmts whose DEF is used outside the loop-nest that is
5620 being vectorized. */
5621 if (STMT_VINFO_LIVE_P (stmt_info)
5622 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5623 {
5624 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5625 gcc_assert (done);
5626 }
5627
5628 if (vec_stmt)
5629 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5630
5631 return is_store;
5632 }
5633
5634
5635 /* Remove a group of stores (for SLP or interleaving), free their
5636 stmt_vec_info. */
5637
5638 void
5639 vect_remove_stores (gimple first_stmt)
5640 {
5641 gimple next = first_stmt;
5642 gimple tmp;
5643 gimple_stmt_iterator next_si;
5644
5645 while (next)
5646 {
5647 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5648
5649 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5650 if (is_pattern_stmt_p (stmt_info))
5651 next = STMT_VINFO_RELATED_STMT (stmt_info);
5652 /* Free the attached stmt_vec_info and remove the stmt. */
5653 next_si = gsi_for_stmt (next);
5654 gsi_remove (&next_si, true);
5655 free_stmt_vec_info (next);
5656 next = tmp;
5657 }
5658 }
5659
5660
5661 /* Function new_stmt_vec_info.
5662
5663 Create and initialize a new stmt_vec_info struct for STMT. */
5664
5665 stmt_vec_info
5666 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5667 bb_vec_info bb_vinfo)
5668 {
5669 stmt_vec_info res;
5670 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5671
5672 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5673 STMT_VINFO_STMT (res) = stmt;
5674 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5675 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5676 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5677 STMT_VINFO_LIVE_P (res) = false;
5678 STMT_VINFO_VECTYPE (res) = NULL;
5679 STMT_VINFO_VEC_STMT (res) = NULL;
5680 STMT_VINFO_VECTORIZABLE (res) = true;
5681 STMT_VINFO_IN_PATTERN_P (res) = false;
5682 STMT_VINFO_RELATED_STMT (res) = NULL;
5683 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5684 STMT_VINFO_DATA_REF (res) = NULL;
5685
5686 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5687 STMT_VINFO_DR_OFFSET (res) = NULL;
5688 STMT_VINFO_DR_INIT (res) = NULL;
5689 STMT_VINFO_DR_STEP (res) = NULL;
5690 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5691
5692 if (gimple_code (stmt) == GIMPLE_PHI
5693 && is_loop_header_bb_p (gimple_bb (stmt)))
5694 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5695 else
5696 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5697
5698 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5699 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5700 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5701 STMT_SLP_TYPE (res) = loop_vect;
5702 GROUP_FIRST_ELEMENT (res) = NULL;
5703 GROUP_NEXT_ELEMENT (res) = NULL;
5704 GROUP_SIZE (res) = 0;
5705 GROUP_STORE_COUNT (res) = 0;
5706 GROUP_GAP (res) = 0;
5707 GROUP_SAME_DR_STMT (res) = NULL;
5708 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5709
5710 return res;
5711 }
5712
5713
5714 /* Create a hash table for stmt_vec_info. */
5715
5716 void
5717 init_stmt_vec_info_vec (void)
5718 {
5719 gcc_assert (!stmt_vec_info_vec);
5720 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5721 }
5722
5723
5724 /* Free hash table for stmt_vec_info. */
5725
5726 void
5727 free_stmt_vec_info_vec (void)
5728 {
5729 gcc_assert (stmt_vec_info_vec);
5730 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5731 }
5732
5733
5734 /* Free stmt vectorization related info. */
5735
5736 void
5737 free_stmt_vec_info (gimple stmt)
5738 {
5739 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5740
5741 if (!stmt_info)
5742 return;
5743
5744 /* Check if this statement has a related "pattern stmt"
5745 (introduced by the vectorizer during the pattern recognition
5746 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5747 too. */
5748 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5749 {
5750 stmt_vec_info patt_info
5751 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5752 if (patt_info)
5753 {
5754 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5755 if (seq)
5756 {
5757 gimple_stmt_iterator si;
5758 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5759 free_stmt_vec_info (gsi_stmt (si));
5760 }
5761 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5762 }
5763 }
5764
5765 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5766 set_vinfo_for_stmt (stmt, NULL);
5767 free (stmt_info);
5768 }
5769
5770
5771 /* Function get_vectype_for_scalar_type_and_size.
5772
5773 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5774 by the target. */
5775
5776 static tree
5777 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5778 {
5779 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5780 enum machine_mode simd_mode;
5781 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5782 int nunits;
5783 tree vectype;
5784
5785 if (nbytes == 0)
5786 return NULL_TREE;
5787
5788 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5789 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5790 return NULL_TREE;
5791
5792 /* We can't build a vector type of elements with alignment bigger than
5793 their size. */
5794 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5795 return NULL_TREE;
5796
5797 /* For vector types of elements whose mode precision doesn't
5798 match their types precision we use a element type of mode
5799 precision. The vectorization routines will have to make sure
5800 they support the proper result truncation/extension.
5801 We also make sure to build vector types with INTEGER_TYPE
5802 component type only. */
5803 if (INTEGRAL_TYPE_P (scalar_type)
5804 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5805 || TREE_CODE (scalar_type) != INTEGER_TYPE))
5806 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5807 TYPE_UNSIGNED (scalar_type));
5808
5809 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5810 When the component mode passes the above test simply use a type
5811 corresponding to that mode. The theory is that any use that
5812 would cause problems with this will disable vectorization anyway. */
5813 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5814 && !INTEGRAL_TYPE_P (scalar_type)
5815 && !POINTER_TYPE_P (scalar_type))
5816 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5817
5818 /* If no size was supplied use the mode the target prefers. Otherwise
5819 lookup a vector mode of the specified size. */
5820 if (size == 0)
5821 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5822 else
5823 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5824 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5825 if (nunits <= 1)
5826 return NULL_TREE;
5827
5828 vectype = build_vector_type (scalar_type, nunits);
5829 if (vect_print_dump_info (REPORT_DETAILS))
5830 {
5831 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5832 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5833 }
5834
5835 if (!vectype)
5836 return NULL_TREE;
5837
5838 if (vect_print_dump_info (REPORT_DETAILS))
5839 {
5840 fprintf (vect_dump, "vectype: ");
5841 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5842 }
5843
5844 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5845 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5846 {
5847 if (vect_print_dump_info (REPORT_DETAILS))
5848 fprintf (vect_dump, "mode not supported by target.");
5849 return NULL_TREE;
5850 }
5851
5852 return vectype;
5853 }
5854
5855 unsigned int current_vector_size;
5856
5857 /* Function get_vectype_for_scalar_type.
5858
5859 Returns the vector type corresponding to SCALAR_TYPE as supported
5860 by the target. */
5861
5862 tree
5863 get_vectype_for_scalar_type (tree scalar_type)
5864 {
5865 tree vectype;
5866 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5867 current_vector_size);
5868 if (vectype
5869 && current_vector_size == 0)
5870 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5871 return vectype;
5872 }
5873
5874 /* Function get_same_sized_vectype
5875
5876 Returns a vector type corresponding to SCALAR_TYPE of size
5877 VECTOR_TYPE if supported by the target. */
5878
5879 tree
5880 get_same_sized_vectype (tree scalar_type, tree vector_type)
5881 {
5882 return get_vectype_for_scalar_type_and_size
5883 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5884 }
5885
5886 /* Function vect_is_simple_use.
5887
5888 Input:
5889 LOOP_VINFO - the vect info of the loop that is being vectorized.
5890 BB_VINFO - the vect info of the basic block that is being vectorized.
5891 OPERAND - operand of STMT in the loop or bb.
5892 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5893
5894 Returns whether a stmt with OPERAND can be vectorized.
5895 For loops, supportable operands are constants, loop invariants, and operands
5896 that are defined by the current iteration of the loop. Unsupportable
5897 operands are those that are defined by a previous iteration of the loop (as
5898 is the case in reduction/induction computations).
5899 For basic blocks, supportable operands are constants and bb invariants.
5900 For now, operands defined outside the basic block are not supported. */
5901
5902 bool
5903 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
5904 bb_vec_info bb_vinfo, gimple *def_stmt,
5905 tree *def, enum vect_def_type *dt)
5906 {
5907 basic_block bb;
5908 stmt_vec_info stmt_vinfo;
5909 struct loop *loop = NULL;
5910
5911 if (loop_vinfo)
5912 loop = LOOP_VINFO_LOOP (loop_vinfo);
5913
5914 *def_stmt = NULL;
5915 *def = NULL_TREE;
5916
5917 if (vect_print_dump_info (REPORT_DETAILS))
5918 {
5919 fprintf (vect_dump, "vect_is_simple_use: operand ");
5920 print_generic_expr (vect_dump, operand, TDF_SLIM);
5921 }
5922
5923 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5924 {
5925 *dt = vect_constant_def;
5926 return true;
5927 }
5928
5929 if (is_gimple_min_invariant (operand))
5930 {
5931 *def = operand;
5932 *dt = vect_external_def;
5933 return true;
5934 }
5935
5936 if (TREE_CODE (operand) == PAREN_EXPR)
5937 {
5938 if (vect_print_dump_info (REPORT_DETAILS))
5939 fprintf (vect_dump, "non-associatable copy.");
5940 operand = TREE_OPERAND (operand, 0);
5941 }
5942
5943 if (TREE_CODE (operand) != SSA_NAME)
5944 {
5945 if (vect_print_dump_info (REPORT_DETAILS))
5946 fprintf (vect_dump, "not ssa-name.");
5947 return false;
5948 }
5949
5950 *def_stmt = SSA_NAME_DEF_STMT (operand);
5951 if (*def_stmt == NULL)
5952 {
5953 if (vect_print_dump_info (REPORT_DETAILS))
5954 fprintf (vect_dump, "no def_stmt.");
5955 return false;
5956 }
5957
5958 if (vect_print_dump_info (REPORT_DETAILS))
5959 {
5960 fprintf (vect_dump, "def_stmt: ");
5961 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5962 }
5963
5964 /* Empty stmt is expected only in case of a function argument.
5965 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5966 if (gimple_nop_p (*def_stmt))
5967 {
5968 *def = operand;
5969 *dt = vect_external_def;
5970 return true;
5971 }
5972
5973 bb = gimple_bb (*def_stmt);
5974
5975 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5976 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5977 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5978 *dt = vect_external_def;
5979 else
5980 {
5981 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5982 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5983 }
5984
5985 if (*dt == vect_unknown_def_type
5986 || (stmt
5987 && *dt == vect_double_reduction_def
5988 && gimple_code (stmt) != GIMPLE_PHI))
5989 {
5990 if (vect_print_dump_info (REPORT_DETAILS))
5991 fprintf (vect_dump, "Unsupported pattern.");
5992 return false;
5993 }
5994
5995 if (vect_print_dump_info (REPORT_DETAILS))
5996 fprintf (vect_dump, "type of def: %d.",*dt);
5997
5998 switch (gimple_code (*def_stmt))
5999 {
6000 case GIMPLE_PHI:
6001 *def = gimple_phi_result (*def_stmt);
6002 break;
6003
6004 case GIMPLE_ASSIGN:
6005 *def = gimple_assign_lhs (*def_stmt);
6006 break;
6007
6008 case GIMPLE_CALL:
6009 *def = gimple_call_lhs (*def_stmt);
6010 if (*def != NULL)
6011 break;
6012 /* FALLTHRU */
6013 default:
6014 if (vect_print_dump_info (REPORT_DETAILS))
6015 fprintf (vect_dump, "unsupported defining stmt: ");
6016 return false;
6017 }
6018
6019 return true;
6020 }
6021
6022 /* Function vect_is_simple_use_1.
6023
6024 Same as vect_is_simple_use_1 but also determines the vector operand
6025 type of OPERAND and stores it to *VECTYPE. If the definition of
6026 OPERAND is vect_uninitialized_def, vect_constant_def or
6027 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6028 is responsible to compute the best suited vector type for the
6029 scalar operand. */
6030
6031 bool
6032 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6033 bb_vec_info bb_vinfo, gimple *def_stmt,
6034 tree *def, enum vect_def_type *dt, tree *vectype)
6035 {
6036 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6037 def, dt))
6038 return false;
6039
6040 /* Now get a vector type if the def is internal, otherwise supply
6041 NULL_TREE and leave it up to the caller to figure out a proper
6042 type for the use stmt. */
6043 if (*dt == vect_internal_def
6044 || *dt == vect_induction_def
6045 || *dt == vect_reduction_def
6046 || *dt == vect_double_reduction_def
6047 || *dt == vect_nested_cycle)
6048 {
6049 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6050
6051 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6052 && !STMT_VINFO_RELEVANT (stmt_info)
6053 && !STMT_VINFO_LIVE_P (stmt_info))
6054 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6055
6056 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6057 gcc_assert (*vectype != NULL_TREE);
6058 }
6059 else if (*dt == vect_uninitialized_def
6060 || *dt == vect_constant_def
6061 || *dt == vect_external_def)
6062 *vectype = NULL_TREE;
6063 else
6064 gcc_unreachable ();
6065
6066 return true;
6067 }
6068
6069
6070 /* Function supportable_widening_operation
6071
6072 Check whether an operation represented by the code CODE is a
6073 widening operation that is supported by the target platform in
6074 vector form (i.e., when operating on arguments of type VECTYPE_IN
6075 producing a result of type VECTYPE_OUT).
6076
6077 Widening operations we currently support are NOP (CONVERT), FLOAT
6078 and WIDEN_MULT. This function checks if these operations are supported
6079 by the target platform either directly (via vector tree-codes), or via
6080 target builtins.
6081
6082 Output:
6083 - CODE1 and CODE2 are codes of vector operations to be used when
6084 vectorizing the operation, if available.
6085 - DECL1 and DECL2 are decls of target builtin functions to be used
6086 when vectorizing the operation, if available. In this case,
6087 CODE1 and CODE2 are CALL_EXPR.
6088 - MULTI_STEP_CVT determines the number of required intermediate steps in
6089 case of multi-step conversion (like char->short->int - in that case
6090 MULTI_STEP_CVT will be 1).
6091 - INTERM_TYPES contains the intermediate type required to perform the
6092 widening operation (short in the above example). */
6093
6094 bool
6095 supportable_widening_operation (enum tree_code code, gimple stmt,
6096 tree vectype_out, tree vectype_in,
6097 tree *decl1, tree *decl2,
6098 enum tree_code *code1, enum tree_code *code2,
6099 int *multi_step_cvt,
6100 VEC (tree, heap) **interm_types)
6101 {
6102 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6103 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6104 struct loop *vect_loop = NULL;
6105 bool ordered_p;
6106 enum machine_mode vec_mode;
6107 enum insn_code icode1, icode2;
6108 optab optab1, optab2;
6109 tree vectype = vectype_in;
6110 tree wide_vectype = vectype_out;
6111 enum tree_code c1, c2;
6112 int i;
6113 tree prev_type, intermediate_type;
6114 enum machine_mode intermediate_mode, prev_mode;
6115 optab optab3, optab4;
6116
6117 *multi_step_cvt = 0;
6118 if (loop_info)
6119 vect_loop = LOOP_VINFO_LOOP (loop_info);
6120
6121 /* The result of a vectorized widening operation usually requires two vectors
6122 (because the widened results do not fit into one vector). The generated
6123 vector results would normally be expected to be generated in the same
6124 order as in the original scalar computation, i.e. if 8 results are
6125 generated in each vector iteration, they are to be organized as follows:
6126 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6127
6128 However, in the special case that the result of the widening operation is
6129 used in a reduction computation only, the order doesn't matter (because
6130 when vectorizing a reduction we change the order of the computation).
6131 Some targets can take advantage of this and generate more efficient code.
6132 For example, targets like Altivec, that support widen_mult using a sequence
6133 of {mult_even,mult_odd} generate the following vectors:
6134 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6135
6136 When vectorizing outer-loops, we execute the inner-loop sequentially
6137 (each vectorized inner-loop iteration contributes to VF outer-loop
6138 iterations in parallel). We therefore don't allow to change the order
6139 of the computation in the inner-loop during outer-loop vectorization. */
6140
6141 if (vect_loop
6142 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6143 && !nested_in_vect_loop_p (vect_loop, stmt))
6144 ordered_p = false;
6145 else
6146 ordered_p = true;
6147
6148 if (!ordered_p
6149 && code == WIDEN_MULT_EXPR
6150 && targetm.vectorize.builtin_mul_widen_even
6151 && targetm.vectorize.builtin_mul_widen_even (vectype)
6152 && targetm.vectorize.builtin_mul_widen_odd
6153 && targetm.vectorize.builtin_mul_widen_odd (vectype))
6154 {
6155 if (vect_print_dump_info (REPORT_DETAILS))
6156 fprintf (vect_dump, "Unordered widening operation detected.");
6157
6158 *code1 = *code2 = CALL_EXPR;
6159 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6160 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6161 return true;
6162 }
6163
6164 switch (code)
6165 {
6166 case WIDEN_MULT_EXPR:
6167 c1 = VEC_WIDEN_MULT_LO_EXPR;
6168 c2 = VEC_WIDEN_MULT_HI_EXPR;
6169 break;
6170
6171 case WIDEN_LSHIFT_EXPR:
6172 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6173 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6174 break;
6175
6176 CASE_CONVERT:
6177 c1 = VEC_UNPACK_LO_EXPR;
6178 c2 = VEC_UNPACK_HI_EXPR;
6179 break;
6180
6181 case FLOAT_EXPR:
6182 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6183 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6184 break;
6185
6186 case FIX_TRUNC_EXPR:
6187 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6188 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6189 computing the operation. */
6190 return false;
6191
6192 default:
6193 gcc_unreachable ();
6194 }
6195
6196 if (BYTES_BIG_ENDIAN)
6197 {
6198 enum tree_code ctmp = c1;
6199 c1 = c2;
6200 c2 = ctmp;
6201 }
6202
6203 if (code == FIX_TRUNC_EXPR)
6204 {
6205 /* The signedness is determined from output operand. */
6206 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6207 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6208 }
6209 else
6210 {
6211 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6212 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6213 }
6214
6215 if (!optab1 || !optab2)
6216 return false;
6217
6218 vec_mode = TYPE_MODE (vectype);
6219 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6220 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6221 return false;
6222
6223 *code1 = c1;
6224 *code2 = c2;
6225
6226 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6227 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6228 return true;
6229
6230 /* Check if it's a multi-step conversion that can be done using intermediate
6231 types. */
6232
6233 prev_type = vectype;
6234 prev_mode = vec_mode;
6235
6236 if (!CONVERT_EXPR_CODE_P (code))
6237 return false;
6238
6239 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6240 intermediate steps in promotion sequence. We try
6241 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6242 not. */
6243 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6244 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6245 {
6246 intermediate_mode = insn_data[icode1].operand[0].mode;
6247 intermediate_type
6248 = lang_hooks.types.type_for_mode (intermediate_mode,
6249 TYPE_UNSIGNED (prev_type));
6250 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6251 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6252
6253 if (!optab3 || !optab4
6254 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6255 || insn_data[icode1].operand[0].mode != intermediate_mode
6256 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6257 || insn_data[icode2].operand[0].mode != intermediate_mode
6258 || ((icode1 = optab_handler (optab3, intermediate_mode))
6259 == CODE_FOR_nothing)
6260 || ((icode2 = optab_handler (optab4, intermediate_mode))
6261 == CODE_FOR_nothing))
6262 break;
6263
6264 VEC_quick_push (tree, *interm_types, intermediate_type);
6265 (*multi_step_cvt)++;
6266
6267 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6268 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6269 return true;
6270
6271 prev_type = intermediate_type;
6272 prev_mode = intermediate_mode;
6273 }
6274
6275 VEC_free (tree, heap, *interm_types);
6276 return false;
6277 }
6278
6279
6280 /* Function supportable_narrowing_operation
6281
6282 Check whether an operation represented by the code CODE is a
6283 narrowing operation that is supported by the target platform in
6284 vector form (i.e., when operating on arguments of type VECTYPE_IN
6285 and producing a result of type VECTYPE_OUT).
6286
6287 Narrowing operations we currently support are NOP (CONVERT) and
6288 FIX_TRUNC. This function checks if these operations are supported by
6289 the target platform directly via vector tree-codes.
6290
6291 Output:
6292 - CODE1 is the code of a vector operation to be used when
6293 vectorizing the operation, if available.
6294 - MULTI_STEP_CVT determines the number of required intermediate steps in
6295 case of multi-step conversion (like int->short->char - in that case
6296 MULTI_STEP_CVT will be 1).
6297 - INTERM_TYPES contains the intermediate type required to perform the
6298 narrowing operation (short in the above example). */
6299
6300 bool
6301 supportable_narrowing_operation (enum tree_code code,
6302 tree vectype_out, tree vectype_in,
6303 enum tree_code *code1, int *multi_step_cvt,
6304 VEC (tree, heap) **interm_types)
6305 {
6306 enum machine_mode vec_mode;
6307 enum insn_code icode1;
6308 optab optab1, interm_optab;
6309 tree vectype = vectype_in;
6310 tree narrow_vectype = vectype_out;
6311 enum tree_code c1;
6312 tree intermediate_type;
6313 enum machine_mode intermediate_mode, prev_mode;
6314 int i;
6315 bool uns;
6316
6317 *multi_step_cvt = 0;
6318 switch (code)
6319 {
6320 CASE_CONVERT:
6321 c1 = VEC_PACK_TRUNC_EXPR;
6322 break;
6323
6324 case FIX_TRUNC_EXPR:
6325 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6326 break;
6327
6328 case FLOAT_EXPR:
6329 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6330 tree code and optabs used for computing the operation. */
6331 return false;
6332
6333 default:
6334 gcc_unreachable ();
6335 }
6336
6337 if (code == FIX_TRUNC_EXPR)
6338 /* The signedness is determined from output operand. */
6339 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6340 else
6341 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6342
6343 if (!optab1)
6344 return false;
6345
6346 vec_mode = TYPE_MODE (vectype);
6347 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6348 return false;
6349
6350 *code1 = c1;
6351
6352 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6353 return true;
6354
6355 /* Check if it's a multi-step conversion that can be done using intermediate
6356 types. */
6357 prev_mode = vec_mode;
6358 if (code == FIX_TRUNC_EXPR)
6359 uns = TYPE_UNSIGNED (vectype_out);
6360 else
6361 uns = TYPE_UNSIGNED (vectype);
6362
6363 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6364 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6365 costly than signed. */
6366 if (code == FIX_TRUNC_EXPR && uns)
6367 {
6368 enum insn_code icode2;
6369
6370 intermediate_type
6371 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6372 interm_optab
6373 = optab_for_tree_code (c1, intermediate_type, optab_default);
6374 if (interm_optab != NULL
6375 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6376 && insn_data[icode1].operand[0].mode
6377 == insn_data[icode2].operand[0].mode)
6378 {
6379 uns = false;
6380 optab1 = interm_optab;
6381 icode1 = icode2;
6382 }
6383 }
6384
6385 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6386 intermediate steps in promotion sequence. We try
6387 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6388 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6389 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6390 {
6391 intermediate_mode = insn_data[icode1].operand[0].mode;
6392 intermediate_type
6393 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6394 interm_optab
6395 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6396 optab_default);
6397 if (!interm_optab
6398 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6399 || insn_data[icode1].operand[0].mode != intermediate_mode
6400 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6401 == CODE_FOR_nothing))
6402 break;
6403
6404 VEC_quick_push (tree, *interm_types, intermediate_type);
6405 (*multi_step_cvt)++;
6406
6407 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6408 return true;
6409
6410 prev_mode = intermediate_mode;
6411 optab1 = interm_optab;
6412 }
6413
6414 VEC_free (tree, heap, *interm_types);
6415 return false;
6416 }