re PR tree-optimization/51058 (ICE: gimple check: expected gimple_assign(error_mark...
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43
44
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
46
47 static tree
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49 {
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 "vect_array");
52 }
53
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
58
59 static tree
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
62 {
63 tree vect_type, vect, vect_name, array_ref;
64 gimple new_stmt;
65
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
72
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
78
79 return vect_name;
80 }
81
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
85
86 static void
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
89 {
90 tree array_ref;
91 gimple new_stmt;
92
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
96
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
100 }
101
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
104 (and its group). */
105
106 static tree
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
108 {
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
111
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
117 pi->misalign = 0;
118 return mem_ref;
119 }
120
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
122
123 /* Function vect_mark_relevant.
124
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
126
127 static void
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
131 {
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
135 gimple pattern_stmt;
136
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
139
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
145 {
146 bool found = false;
147 if (!used_in_pattern)
148 {
149 imm_use_iterator imm_iter;
150 use_operand_p use_p;
151 gimple use_stmt;
152 tree lhs;
153
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
156 else
157 lhs = gimple_call_lhs (stmt);
158
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
161 stmt. */
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
164 {
165 if (is_gimple_debug (USE_STMT (use_p)))
166 continue;
167 use_stmt = USE_STMT (use_p);
168
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
171 {
172 found = true;
173 break;
174 }
175 }
176 }
177
178 if (!found)
179 {
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
184
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
186
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
189 " relevant/live.");
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
194 stmt = pattern_stmt;
195 }
196 }
197
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
201
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
204 {
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
207 return;
208 }
209
210 VEC_safe_push (gimple, heap, *worklist, stmt);
211 }
212
213
214 /* Function vect_stmt_relevant_p.
215
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
218
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
223
224 CHECKME: what other side effects would the vectorizer allow? */
225
226 static bool
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
229 {
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231 ssa_op_iter op_iter;
232 imm_use_iterator imm_iter;
233 use_operand_p use_p;
234 def_operand_p def_p;
235
236 *relevant = vect_unused_in_scope;
237 *live_p = false;
238
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
244
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
248 {
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
252 }
253
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
256 {
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
258 {
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
261 {
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
264
265 if (is_gimple_debug (USE_STMT (use_p)))
266 continue;
267
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
272
273 *live_p = true;
274 }
275 }
276 }
277
278 return (*live_p || *relevant);
279 }
280
281
282 /* Function exist_non_indexing_operands_for_use_p
283
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
286
287 static bool
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
289 {
290 tree operand;
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
292
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
297 return true;
298
299 /* STMT has a data_ref. FORNOW this means that its of one of
300 the following forms:
301 -1- ARRAY_REF = var
302 -2- var = ARRAY_REF
303 (This should have been verified in analyze_data_refs).
304
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
307 for array indexing.
308
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
311
312 if (!gimple_assign_copy_p (stmt))
313 return false;
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
315 return false;
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
318 return false;
319
320 if (operand == use)
321 return true;
322
323 return false;
324 }
325
326
327 /*
328 Function process_use.
329
330 Inputs:
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
335 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
336 be performed.
337
338 Outputs:
339 Generally, LIVE_P and RELEVANT are used to define the liveness and
340 relevance info of the DEF_STMT of this USE:
341 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
342 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
343 Exceptions:
344 - case 1: If USE is used only for address computations (e.g. array indexing),
345 which does not need to be directly vectorized, then the liveness/relevance
346 of the respective DEF_STMT is left unchanged.
347 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
348 skip DEF_STMT cause it had already been processed.
349 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
350 be modified accordingly.
351
352 Return true if everything is as expected. Return false otherwise. */
353
354 static bool
355 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
356 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
357 bool force)
358 {
359 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
360 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
361 stmt_vec_info dstmt_vinfo;
362 basic_block bb, def_bb;
363 tree def;
364 gimple def_stmt;
365 enum vect_def_type dt;
366
367 /* case 1: we are only interested in uses that need to be vectorized. Uses
368 that are used for address computation are not considered relevant. */
369 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
370 return true;
371
372 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
373 {
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
375 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
376 return false;
377 }
378
379 if (!def_stmt || gimple_nop_p (def_stmt))
380 return true;
381
382 def_bb = gimple_bb (def_stmt);
383 if (!flow_bb_inside_loop_p (loop, def_bb))
384 {
385 if (vect_print_dump_info (REPORT_DETAILS))
386 fprintf (vect_dump, "def_stmt is out of loop.");
387 return true;
388 }
389
390 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
391 DEF_STMT must have already been processed, because this should be the
392 only way that STMT, which is a reduction-phi, was put in the worklist,
393 as there should be no other uses for DEF_STMT in the loop. So we just
394 check that everything is as expected, and we are done. */
395 dstmt_vinfo = vinfo_for_stmt (def_stmt);
396 bb = gimple_bb (stmt);
397 if (gimple_code (stmt) == GIMPLE_PHI
398 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
399 && gimple_code (def_stmt) != GIMPLE_PHI
400 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
401 && bb->loop_father == def_bb->loop_father)
402 {
403 if (vect_print_dump_info (REPORT_DETAILS))
404 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
405 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
406 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
407 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
408 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
409 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
410 return true;
411 }
412
413 /* case 3a: outer-loop stmt defining an inner-loop stmt:
414 outer-loop-header-bb:
415 d = def_stmt
416 inner-loop:
417 stmt # use (d)
418 outer-loop-tail-bb:
419 ... */
420 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
421 {
422 if (vect_print_dump_info (REPORT_DETAILS))
423 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
424
425 switch (relevant)
426 {
427 case vect_unused_in_scope:
428 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
429 vect_used_in_scope : vect_unused_in_scope;
430 break;
431
432 case vect_used_in_outer_by_reduction:
433 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
434 relevant = vect_used_by_reduction;
435 break;
436
437 case vect_used_in_outer:
438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439 relevant = vect_used_in_scope;
440 break;
441
442 case vect_used_in_scope:
443 break;
444
445 default:
446 gcc_unreachable ();
447 }
448 }
449
450 /* case 3b: inner-loop stmt defining an outer-loop stmt:
451 outer-loop-header-bb:
452 ...
453 inner-loop:
454 d = def_stmt
455 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
456 stmt # use (d) */
457 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
458 {
459 if (vect_print_dump_info (REPORT_DETAILS))
460 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
461
462 switch (relevant)
463 {
464 case vect_unused_in_scope:
465 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
466 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
467 vect_used_in_outer_by_reduction : vect_unused_in_scope;
468 break;
469
470 case vect_used_by_reduction:
471 relevant = vect_used_in_outer_by_reduction;
472 break;
473
474 case vect_used_in_scope:
475 relevant = vect_used_in_outer;
476 break;
477
478 default:
479 gcc_unreachable ();
480 }
481 }
482
483 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
484 is_pattern_stmt_p (stmt_vinfo));
485 return true;
486 }
487
488
489 /* Function vect_mark_stmts_to_be_vectorized.
490
491 Not all stmts in the loop need to be vectorized. For example:
492
493 for i...
494 for j...
495 1. T0 = i + j
496 2. T1 = a[T0]
497
498 3. j = j + 1
499
500 Stmt 1 and 3 do not need to be vectorized, because loop control and
501 addressing of vectorized data-refs are handled differently.
502
503 This pass detects such stmts. */
504
505 bool
506 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
507 {
508 VEC(gimple,heap) *worklist;
509 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
510 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
511 unsigned int nbbs = loop->num_nodes;
512 gimple_stmt_iterator si;
513 gimple stmt;
514 unsigned int i;
515 stmt_vec_info stmt_vinfo;
516 basic_block bb;
517 gimple phi;
518 bool live_p;
519 enum vect_relevant relevant, tmp_relevant;
520 enum vect_def_type def_type;
521
522 if (vect_print_dump_info (REPORT_DETAILS))
523 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
524
525 worklist = VEC_alloc (gimple, heap, 64);
526
527 /* 1. Init worklist. */
528 for (i = 0; i < nbbs; i++)
529 {
530 bb = bbs[i];
531 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
532 {
533 phi = gsi_stmt (si);
534 if (vect_print_dump_info (REPORT_DETAILS))
535 {
536 fprintf (vect_dump, "init: phi relevant? ");
537 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
538 }
539
540 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
541 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
542 }
543 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
544 {
545 stmt = gsi_stmt (si);
546 if (vect_print_dump_info (REPORT_DETAILS))
547 {
548 fprintf (vect_dump, "init: stmt relevant? ");
549 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
550 }
551
552 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
553 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
554 }
555 }
556
557 /* 2. Process_worklist */
558 while (VEC_length (gimple, worklist) > 0)
559 {
560 use_operand_p use_p;
561 ssa_op_iter iter;
562
563 stmt = VEC_pop (gimple, worklist);
564 if (vect_print_dump_info (REPORT_DETAILS))
565 {
566 fprintf (vect_dump, "worklist: examine stmt: ");
567 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
568 }
569
570 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
571 (DEF_STMT) as relevant/irrelevant and live/dead according to the
572 liveness and relevance properties of STMT. */
573 stmt_vinfo = vinfo_for_stmt (stmt);
574 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
575 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
576
577 /* Generally, the liveness and relevance properties of STMT are
578 propagated as is to the DEF_STMTs of its USEs:
579 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
580 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
581
582 One exception is when STMT has been identified as defining a reduction
583 variable; in this case we set the liveness/relevance as follows:
584 live_p = false
585 relevant = vect_used_by_reduction
586 This is because we distinguish between two kinds of relevant stmts -
587 those that are used by a reduction computation, and those that are
588 (also) used by a regular computation. This allows us later on to
589 identify stmts that are used solely by a reduction, and therefore the
590 order of the results that they produce does not have to be kept. */
591
592 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
593 tmp_relevant = relevant;
594 switch (def_type)
595 {
596 case vect_reduction_def:
597 switch (tmp_relevant)
598 {
599 case vect_unused_in_scope:
600 relevant = vect_used_by_reduction;
601 break;
602
603 case vect_used_by_reduction:
604 if (gimple_code (stmt) == GIMPLE_PHI)
605 break;
606 /* fall through */
607
608 default:
609 if (vect_print_dump_info (REPORT_DETAILS))
610 fprintf (vect_dump, "unsupported use of reduction.");
611
612 VEC_free (gimple, heap, worklist);
613 return false;
614 }
615
616 live_p = false;
617 break;
618
619 case vect_nested_cycle:
620 if (tmp_relevant != vect_unused_in_scope
621 && tmp_relevant != vect_used_in_outer_by_reduction
622 && tmp_relevant != vect_used_in_outer)
623 {
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "unsupported use of nested cycle.");
626
627 VEC_free (gimple, heap, worklist);
628 return false;
629 }
630
631 live_p = false;
632 break;
633
634 case vect_double_reduction_def:
635 if (tmp_relevant != vect_unused_in_scope
636 && tmp_relevant != vect_used_by_reduction)
637 {
638 if (vect_print_dump_info (REPORT_DETAILS))
639 fprintf (vect_dump, "unsupported use of double reduction.");
640
641 VEC_free (gimple, heap, worklist);
642 return false;
643 }
644
645 live_p = false;
646 break;
647
648 default:
649 break;
650 }
651
652 if (is_pattern_stmt_p (stmt_vinfo))
653 {
654 /* Pattern statements are not inserted into the code, so
655 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
656 have to scan the RHS or function arguments instead. */
657 if (is_gimple_assign (stmt))
658 {
659 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
660 tree op = gimple_assign_rhs1 (stmt);
661
662 i = 1;
663 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
664 {
665 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
666 live_p, relevant, &worklist, false)
667 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
668 live_p, relevant, &worklist, false))
669 {
670 VEC_free (gimple, heap, worklist);
671 return false;
672 }
673 i = 2;
674 }
675 for (; i < gimple_num_ops (stmt); i++)
676 {
677 op = gimple_op (stmt, i);
678 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
679 &worklist, false))
680 {
681 VEC_free (gimple, heap, worklist);
682 return false;
683 }
684 }
685 }
686 else if (is_gimple_call (stmt))
687 {
688 for (i = 0; i < gimple_call_num_args (stmt); i++)
689 {
690 tree arg = gimple_call_arg (stmt, i);
691 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
692 &worklist, false))
693 {
694 VEC_free (gimple, heap, worklist);
695 return false;
696 }
697 }
698 }
699 }
700 else
701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
702 {
703 tree op = USE_FROM_PTR (use_p);
704 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
705 &worklist, false))
706 {
707 VEC_free (gimple, heap, worklist);
708 return false;
709 }
710 }
711
712 if (STMT_VINFO_GATHER_P (stmt_vinfo))
713 {
714 tree off;
715 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
716 gcc_assert (decl);
717 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
718 &worklist, true))
719 {
720 VEC_free (gimple, heap, worklist);
721 return false;
722 }
723 }
724 } /* while worklist */
725
726 VEC_free (gimple, heap, worklist);
727 return true;
728 }
729
730
731 /* Get cost by calling cost target builtin. */
732
733 static inline
734 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
735 {
736 tree dummy_type = NULL;
737 int dummy = 0;
738
739 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
740 dummy_type, dummy);
741 }
742
743
744 /* Get cost for STMT. */
745
746 int
747 cost_for_stmt (gimple stmt)
748 {
749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
750
751 switch (STMT_VINFO_TYPE (stmt_info))
752 {
753 case load_vec_info_type:
754 return vect_get_stmt_cost (scalar_load);
755 case store_vec_info_type:
756 return vect_get_stmt_cost (scalar_store);
757 case op_vec_info_type:
758 case condition_vec_info_type:
759 case assignment_vec_info_type:
760 case reduc_vec_info_type:
761 case induc_vec_info_type:
762 case type_promotion_vec_info_type:
763 case type_demotion_vec_info_type:
764 case type_conversion_vec_info_type:
765 case call_vec_info_type:
766 return vect_get_stmt_cost (scalar_stmt);
767 case undef_vec_info_type:
768 default:
769 gcc_unreachable ();
770 }
771 }
772
773 /* Function vect_model_simple_cost.
774
775 Models cost for simple operations, i.e. those that only emit ncopies of a
776 single op. Right now, this does not account for multiple insns that could
777 be generated for the single vector op. We will handle that shortly. */
778
779 void
780 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
781 enum vect_def_type *dt, slp_tree slp_node)
782 {
783 int i;
784 int inside_cost = 0, outside_cost = 0;
785
786 /* The SLP costs were already calculated during SLP tree build. */
787 if (PURE_SLP_STMT (stmt_info))
788 return;
789
790 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
791
792 /* FORNOW: Assuming maximum 2 args per stmts. */
793 for (i = 0; i < 2; i++)
794 {
795 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
796 outside_cost += vect_get_stmt_cost (vector_stmt);
797 }
798
799 if (vect_print_dump_info (REPORT_COST))
800 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
801 "outside_cost = %d .", inside_cost, outside_cost);
802
803 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
804 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
805 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
806 }
807
808
809 /* Function vect_cost_strided_group_size
810
811 For strided load or store, return the group_size only if it is the first
812 load or store of a group, else return 1. This ensures that group size is
813 only returned once per group. */
814
815 static int
816 vect_cost_strided_group_size (stmt_vec_info stmt_info)
817 {
818 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
819
820 if (first_stmt == STMT_VINFO_STMT (stmt_info))
821 return GROUP_SIZE (stmt_info);
822
823 return 1;
824 }
825
826
827 /* Function vect_model_store_cost
828
829 Models cost for stores. In the case of strided accesses, one access
830 has the overhead of the strided access attributed to it. */
831
832 void
833 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
834 bool store_lanes_p, enum vect_def_type dt,
835 slp_tree slp_node)
836 {
837 int group_size;
838 unsigned int inside_cost = 0, outside_cost = 0;
839 struct data_reference *first_dr;
840 gimple first_stmt;
841
842 /* The SLP costs were already calculated during SLP tree build. */
843 if (PURE_SLP_STMT (stmt_info))
844 return;
845
846 if (dt == vect_constant_def || dt == vect_external_def)
847 outside_cost = vect_get_stmt_cost (scalar_to_vec);
848
849 /* Strided access? */
850 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
851 {
852 if (slp_node)
853 {
854 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
855 group_size = 1;
856 }
857 else
858 {
859 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
860 group_size = vect_cost_strided_group_size (stmt_info);
861 }
862
863 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
864 }
865 /* Not a strided access. */
866 else
867 {
868 group_size = 1;
869 first_dr = STMT_VINFO_DATA_REF (stmt_info);
870 }
871
872 /* We assume that the cost of a single store-lanes instruction is
873 equivalent to the cost of GROUP_SIZE separate stores. If a strided
874 access is instead being provided by a permute-and-store operation,
875 include the cost of the permutes. */
876 if (!store_lanes_p && group_size > 1)
877 {
878 /* Uses a high and low interleave operation for each needed permute. */
879 inside_cost = ncopies * exact_log2(group_size) * group_size
880 * vect_get_stmt_cost (vector_stmt);
881
882 if (vect_print_dump_info (REPORT_COST))
883 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
884 group_size);
885
886 }
887
888 /* Costs of the stores. */
889 vect_get_store_cost (first_dr, ncopies, &inside_cost);
890
891 if (vect_print_dump_info (REPORT_COST))
892 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
893 "outside_cost = %d .", inside_cost, outside_cost);
894
895 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
896 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
897 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
898 }
899
900
901 /* Calculate cost of DR's memory access. */
902 void
903 vect_get_store_cost (struct data_reference *dr, int ncopies,
904 unsigned int *inside_cost)
905 {
906 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
907
908 switch (alignment_support_scheme)
909 {
910 case dr_aligned:
911 {
912 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
913
914 if (vect_print_dump_info (REPORT_COST))
915 fprintf (vect_dump, "vect_model_store_cost: aligned.");
916
917 break;
918 }
919
920 case dr_unaligned_supported:
921 {
922 gimple stmt = DR_STMT (dr);
923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
924 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
925
926 /* Here, we assign an additional cost for the unaligned store. */
927 *inside_cost += ncopies
928 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
929 vectype, DR_MISALIGNMENT (dr));
930
931 if (vect_print_dump_info (REPORT_COST))
932 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
933 "hardware.");
934
935 break;
936 }
937
938 default:
939 gcc_unreachable ();
940 }
941 }
942
943
944 /* Function vect_model_load_cost
945
946 Models cost for loads. In the case of strided accesses, the last access
947 has the overhead of the strided access attributed to it. Since unaligned
948 accesses are supported for loads, we also account for the costs of the
949 access scheme chosen. */
950
951 void
952 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
953 slp_tree slp_node)
954 {
955 int group_size;
956 gimple first_stmt;
957 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
958 unsigned int inside_cost = 0, outside_cost = 0;
959
960 /* The SLP costs were already calculated during SLP tree build. */
961 if (PURE_SLP_STMT (stmt_info))
962 return;
963
964 /* Strided accesses? */
965 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
966 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
967 {
968 group_size = vect_cost_strided_group_size (stmt_info);
969 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
970 }
971 /* Not a strided access. */
972 else
973 {
974 group_size = 1;
975 first_dr = dr;
976 }
977
978 /* We assume that the cost of a single load-lanes instruction is
979 equivalent to the cost of GROUP_SIZE separate loads. If a strided
980 access is instead being provided by a load-and-permute operation,
981 include the cost of the permutes. */
982 if (!load_lanes_p && group_size > 1)
983 {
984 /* Uses an even and odd extract operations for each needed permute. */
985 inside_cost = ncopies * exact_log2(group_size) * group_size
986 * vect_get_stmt_cost (vector_stmt);
987
988 if (vect_print_dump_info (REPORT_COST))
989 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
990 group_size);
991 }
992
993 /* The loads themselves. */
994 vect_get_load_cost (first_dr, ncopies,
995 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
996 || slp_node),
997 &inside_cost, &outside_cost);
998
999 if (vect_print_dump_info (REPORT_COST))
1000 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1001 "outside_cost = %d .", inside_cost, outside_cost);
1002
1003 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1004 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1005 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1006 }
1007
1008
1009 /* Calculate cost of DR's memory access. */
1010 void
1011 vect_get_load_cost (struct data_reference *dr, int ncopies,
1012 bool add_realign_cost, unsigned int *inside_cost,
1013 unsigned int *outside_cost)
1014 {
1015 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1016
1017 switch (alignment_support_scheme)
1018 {
1019 case dr_aligned:
1020 {
1021 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1022
1023 if (vect_print_dump_info (REPORT_COST))
1024 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1025
1026 break;
1027 }
1028 case dr_unaligned_supported:
1029 {
1030 gimple stmt = DR_STMT (dr);
1031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1032 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1033
1034 /* Here, we assign an additional cost for the unaligned load. */
1035 *inside_cost += ncopies
1036 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1037 vectype, DR_MISALIGNMENT (dr));
1038 if (vect_print_dump_info (REPORT_COST))
1039 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1040 "hardware.");
1041
1042 break;
1043 }
1044 case dr_explicit_realign:
1045 {
1046 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1047 + vect_get_stmt_cost (vector_stmt));
1048
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 outside costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += vect_get_stmt_cost (vector_stmt);
1054
1055 break;
1056 }
1057 case dr_explicit_realign_optimized:
1058 {
1059 if (vect_print_dump_info (REPORT_COST))
1060 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1061 "pipelined.");
1062
1063 /* Unaligned software pipeline has a load of an address, an initial
1064 load, and possibly a mask operation to "prime" the loop. However,
1065 if this is an access in a group of loads, which provide strided
1066 access, then the above cost should only be considered for one
1067 access in the group. Inside the loop, there is a load op
1068 and a realignment op. */
1069
1070 if (add_realign_cost)
1071 {
1072 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1073 if (targetm.vectorize.builtin_mask_for_load)
1074 *outside_cost += vect_get_stmt_cost (vector_stmt);
1075 }
1076
1077 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1078 + vect_get_stmt_cost (vector_stmt));
1079 break;
1080 }
1081
1082 default:
1083 gcc_unreachable ();
1084 }
1085 }
1086
1087
1088 /* Function vect_init_vector.
1089
1090 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1091 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1092 is not NULL. Otherwise, place the initialization at the loop preheader.
1093 Return the DEF of INIT_STMT.
1094 It will be used in the vectorization of STMT. */
1095
1096 tree
1097 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1098 gimple_stmt_iterator *gsi)
1099 {
1100 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1101 tree new_var;
1102 gimple init_stmt;
1103 tree vec_oprnd;
1104 edge pe;
1105 tree new_temp;
1106 basic_block new_bb;
1107
1108 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1109 add_referenced_var (new_var);
1110 init_stmt = gimple_build_assign (new_var, vector_var);
1111 new_temp = make_ssa_name (new_var, init_stmt);
1112 gimple_assign_set_lhs (init_stmt, new_temp);
1113
1114 if (gsi)
1115 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1116 else
1117 {
1118 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1119
1120 if (loop_vinfo)
1121 {
1122 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1123
1124 if (nested_in_vect_loop_p (loop, stmt))
1125 loop = loop->inner;
1126
1127 pe = loop_preheader_edge (loop);
1128 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1129 gcc_assert (!new_bb);
1130 }
1131 else
1132 {
1133 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1134 basic_block bb;
1135 gimple_stmt_iterator gsi_bb_start;
1136
1137 gcc_assert (bb_vinfo);
1138 bb = BB_VINFO_BB (bb_vinfo);
1139 gsi_bb_start = gsi_after_labels (bb);
1140 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1141 }
1142 }
1143
1144 if (vect_print_dump_info (REPORT_DETAILS))
1145 {
1146 fprintf (vect_dump, "created new init_stmt: ");
1147 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1148 }
1149
1150 vec_oprnd = gimple_assign_lhs (init_stmt);
1151 return vec_oprnd;
1152 }
1153
1154
1155 /* Function vect_get_vec_def_for_operand.
1156
1157 OP is an operand in STMT. This function returns a (vector) def that will be
1158 used in the vectorized stmt for STMT.
1159
1160 In the case that OP is an SSA_NAME which is defined in the loop, then
1161 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1162
1163 In case OP is an invariant or constant, a new stmt that creates a vector def
1164 needs to be introduced. */
1165
1166 tree
1167 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1168 {
1169 tree vec_oprnd;
1170 gimple vec_stmt;
1171 gimple def_stmt;
1172 stmt_vec_info def_stmt_info = NULL;
1173 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1174 unsigned int nunits;
1175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1176 tree vec_inv;
1177 tree vec_cst;
1178 tree t = NULL_TREE;
1179 tree def;
1180 int i;
1181 enum vect_def_type dt;
1182 bool is_simple_use;
1183 tree vector_type;
1184
1185 if (vect_print_dump_info (REPORT_DETAILS))
1186 {
1187 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1188 print_generic_expr (vect_dump, op, TDF_SLIM);
1189 }
1190
1191 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1192 &dt);
1193 gcc_assert (is_simple_use);
1194 if (vect_print_dump_info (REPORT_DETAILS))
1195 {
1196 if (def)
1197 {
1198 fprintf (vect_dump, "def = ");
1199 print_generic_expr (vect_dump, def, TDF_SLIM);
1200 }
1201 if (def_stmt)
1202 {
1203 fprintf (vect_dump, " def_stmt = ");
1204 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1205 }
1206 }
1207
1208 switch (dt)
1209 {
1210 /* Case 1: operand is a constant. */
1211 case vect_constant_def:
1212 {
1213 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1214 gcc_assert (vector_type);
1215 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1216
1217 if (scalar_def)
1218 *scalar_def = op;
1219
1220 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1221 if (vect_print_dump_info (REPORT_DETAILS))
1222 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1223
1224 vec_cst = build_vector_from_val (vector_type,
1225 fold_convert (TREE_TYPE (vector_type),
1226 op));
1227 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1228 }
1229
1230 /* Case 2: operand is defined outside the loop - loop invariant. */
1231 case vect_external_def:
1232 {
1233 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1234 gcc_assert (vector_type);
1235 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1236
1237 if (scalar_def)
1238 *scalar_def = def;
1239
1240 /* Create 'vec_inv = {inv,inv,..,inv}' */
1241 if (vect_print_dump_info (REPORT_DETAILS))
1242 fprintf (vect_dump, "Create vector_inv.");
1243
1244 for (i = nunits - 1; i >= 0; --i)
1245 {
1246 t = tree_cons (NULL_TREE, def, t);
1247 }
1248
1249 /* FIXME: use build_constructor directly. */
1250 vec_inv = build_constructor_from_list (vector_type, t);
1251 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1252 }
1253
1254 /* Case 3: operand is defined inside the loop. */
1255 case vect_internal_def:
1256 {
1257 if (scalar_def)
1258 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1259
1260 /* Get the def from the vectorized stmt. */
1261 def_stmt_info = vinfo_for_stmt (def_stmt);
1262
1263 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1264 /* Get vectorized pattern statement. */
1265 if (!vec_stmt
1266 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1267 && !STMT_VINFO_RELEVANT (def_stmt_info))
1268 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1269 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1270 gcc_assert (vec_stmt);
1271 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1272 vec_oprnd = PHI_RESULT (vec_stmt);
1273 else if (is_gimple_call (vec_stmt))
1274 vec_oprnd = gimple_call_lhs (vec_stmt);
1275 else
1276 vec_oprnd = gimple_assign_lhs (vec_stmt);
1277 return vec_oprnd;
1278 }
1279
1280 /* Case 4: operand is defined by a loop header phi - reduction */
1281 case vect_reduction_def:
1282 case vect_double_reduction_def:
1283 case vect_nested_cycle:
1284 {
1285 struct loop *loop;
1286
1287 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1288 loop = (gimple_bb (def_stmt))->loop_father;
1289
1290 /* Get the def before the loop */
1291 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1292 return get_initial_def_for_reduction (stmt, op, scalar_def);
1293 }
1294
1295 /* Case 5: operand is defined by loop-header phi - induction. */
1296 case vect_induction_def:
1297 {
1298 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1299
1300 /* Get the def from the vectorized stmt. */
1301 def_stmt_info = vinfo_for_stmt (def_stmt);
1302 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1303 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1304 vec_oprnd = PHI_RESULT (vec_stmt);
1305 else
1306 vec_oprnd = gimple_get_lhs (vec_stmt);
1307 return vec_oprnd;
1308 }
1309
1310 default:
1311 gcc_unreachable ();
1312 }
1313 }
1314
1315
1316 /* Function vect_get_vec_def_for_stmt_copy
1317
1318 Return a vector-def for an operand. This function is used when the
1319 vectorized stmt to be created (by the caller to this function) is a "copy"
1320 created in case the vectorized result cannot fit in one vector, and several
1321 copies of the vector-stmt are required. In this case the vector-def is
1322 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1323 of the stmt that defines VEC_OPRND.
1324 DT is the type of the vector def VEC_OPRND.
1325
1326 Context:
1327 In case the vectorization factor (VF) is bigger than the number
1328 of elements that can fit in a vectype (nunits), we have to generate
1329 more than one vector stmt to vectorize the scalar stmt. This situation
1330 arises when there are multiple data-types operated upon in the loop; the
1331 smallest data-type determines the VF, and as a result, when vectorizing
1332 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1333 vector stmt (each computing a vector of 'nunits' results, and together
1334 computing 'VF' results in each iteration). This function is called when
1335 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1336 which VF=16 and nunits=4, so the number of copies required is 4):
1337
1338 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1339
1340 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1341 VS1.1: vx.1 = memref1 VS1.2
1342 VS1.2: vx.2 = memref2 VS1.3
1343 VS1.3: vx.3 = memref3
1344
1345 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1346 VSnew.1: vz1 = vx.1 + ... VSnew.2
1347 VSnew.2: vz2 = vx.2 + ... VSnew.3
1348 VSnew.3: vz3 = vx.3 + ...
1349
1350 The vectorization of S1 is explained in vectorizable_load.
1351 The vectorization of S2:
1352 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1353 the function 'vect_get_vec_def_for_operand' is called to
1354 get the relevant vector-def for each operand of S2. For operand x it
1355 returns the vector-def 'vx.0'.
1356
1357 To create the remaining copies of the vector-stmt (VSnew.j), this
1358 function is called to get the relevant vector-def for each operand. It is
1359 obtained from the respective VS1.j stmt, which is recorded in the
1360 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1361
1362 For example, to obtain the vector-def 'vx.1' in order to create the
1363 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1364 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1365 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1366 and return its def ('vx.1').
1367 Overall, to create the above sequence this function will be called 3 times:
1368 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1369 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1370 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1371
1372 tree
1373 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1374 {
1375 gimple vec_stmt_for_operand;
1376 stmt_vec_info def_stmt_info;
1377
1378 /* Do nothing; can reuse same def. */
1379 if (dt == vect_external_def || dt == vect_constant_def )
1380 return vec_oprnd;
1381
1382 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1383 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1384 gcc_assert (def_stmt_info);
1385 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1386 gcc_assert (vec_stmt_for_operand);
1387 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1388 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1390 else
1391 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1392 return vec_oprnd;
1393 }
1394
1395
1396 /* Get vectorized definitions for the operands to create a copy of an original
1397 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1398
1399 static void
1400 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1401 VEC(tree,heap) **vec_oprnds0,
1402 VEC(tree,heap) **vec_oprnds1)
1403 {
1404 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1405
1406 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1407 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1408
1409 if (vec_oprnds1 && *vec_oprnds1)
1410 {
1411 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1412 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1413 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1414 }
1415 }
1416
1417
1418 /* Get vectorized definitions for OP0 and OP1.
1419 REDUC_INDEX is the index of reduction operand in case of reduction,
1420 and -1 otherwise. */
1421
1422 void
1423 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1424 VEC (tree, heap) **vec_oprnds0,
1425 VEC (tree, heap) **vec_oprnds1,
1426 slp_tree slp_node, int reduc_index)
1427 {
1428 if (slp_node)
1429 {
1430 int nops = (op1 == NULL_TREE) ? 1 : 2;
1431 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1432 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1433
1434 VEC_quick_push (tree, ops, op0);
1435 if (op1)
1436 VEC_quick_push (tree, ops, op1);
1437
1438 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1439
1440 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1441 if (op1)
1442 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1443
1444 VEC_free (tree, heap, ops);
1445 VEC_free (slp_void_p, heap, vec_defs);
1446 }
1447 else
1448 {
1449 tree vec_oprnd;
1450
1451 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1452 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1453 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1454
1455 if (op1)
1456 {
1457 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1458 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1459 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1460 }
1461 }
1462 }
1463
1464
1465 /* Function vect_finish_stmt_generation.
1466
1467 Insert a new stmt. */
1468
1469 void
1470 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1471 gimple_stmt_iterator *gsi)
1472 {
1473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1474 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1475 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1476
1477 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1478
1479 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1480
1481 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1482 bb_vinfo));
1483
1484 if (vect_print_dump_info (REPORT_DETAILS))
1485 {
1486 fprintf (vect_dump, "add new stmt: ");
1487 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1488 }
1489
1490 gimple_set_location (vec_stmt, gimple_location (stmt));
1491 }
1492
1493 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1494 a function declaration if the target has a vectorized version
1495 of the function, or NULL_TREE if the function cannot be vectorized. */
1496
1497 tree
1498 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1499 {
1500 tree fndecl = gimple_call_fndecl (call);
1501
1502 /* We only handle functions that do not read or clobber memory -- i.e.
1503 const or novops ones. */
1504 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1505 return NULL_TREE;
1506
1507 if (!fndecl
1508 || TREE_CODE (fndecl) != FUNCTION_DECL
1509 || !DECL_BUILT_IN (fndecl))
1510 return NULL_TREE;
1511
1512 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1513 vectype_in);
1514 }
1515
1516 /* Function vectorizable_call.
1517
1518 Check if STMT performs a function call that can be vectorized.
1519 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1520 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1521 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1522
1523 static bool
1524 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1525 slp_tree slp_node)
1526 {
1527 tree vec_dest;
1528 tree scalar_dest;
1529 tree op, type;
1530 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1531 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1532 tree vectype_out, vectype_in;
1533 int nunits_in;
1534 int nunits_out;
1535 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1536 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1537 tree fndecl, new_temp, def, rhs_type;
1538 gimple def_stmt;
1539 enum vect_def_type dt[3]
1540 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1541 gimple new_stmt = NULL;
1542 int ncopies, j;
1543 VEC(tree, heap) *vargs = NULL;
1544 enum { NARROW, NONE, WIDEN } modifier;
1545 size_t i, nargs;
1546 tree lhs;
1547
1548 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1549 return false;
1550
1551 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1552 return false;
1553
1554 /* Is STMT a vectorizable call? */
1555 if (!is_gimple_call (stmt))
1556 return false;
1557
1558 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1559 return false;
1560
1561 if (stmt_can_throw_internal (stmt))
1562 return false;
1563
1564 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1565
1566 /* Process function arguments. */
1567 rhs_type = NULL_TREE;
1568 vectype_in = NULL_TREE;
1569 nargs = gimple_call_num_args (stmt);
1570
1571 /* Bail out if the function has more than three arguments, we do not have
1572 interesting builtin functions to vectorize with more than two arguments
1573 except for fma. No arguments is also not good. */
1574 if (nargs == 0 || nargs > 3)
1575 return false;
1576
1577 for (i = 0; i < nargs; i++)
1578 {
1579 tree opvectype;
1580
1581 op = gimple_call_arg (stmt, i);
1582
1583 /* We can only handle calls with arguments of the same type. */
1584 if (rhs_type
1585 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1586 {
1587 if (vect_print_dump_info (REPORT_DETAILS))
1588 fprintf (vect_dump, "argument types differ.");
1589 return false;
1590 }
1591 if (!rhs_type)
1592 rhs_type = TREE_TYPE (op);
1593
1594 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
1595 &def_stmt, &def, &dt[i], &opvectype))
1596 {
1597 if (vect_print_dump_info (REPORT_DETAILS))
1598 fprintf (vect_dump, "use not simple.");
1599 return false;
1600 }
1601
1602 if (!vectype_in)
1603 vectype_in = opvectype;
1604 else if (opvectype
1605 && opvectype != vectype_in)
1606 {
1607 if (vect_print_dump_info (REPORT_DETAILS))
1608 fprintf (vect_dump, "argument vector types differ.");
1609 return false;
1610 }
1611 }
1612 /* If all arguments are external or constant defs use a vector type with
1613 the same size as the output vector type. */
1614 if (!vectype_in)
1615 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1616 if (vec_stmt)
1617 gcc_assert (vectype_in);
1618 if (!vectype_in)
1619 {
1620 if (vect_print_dump_info (REPORT_DETAILS))
1621 {
1622 fprintf (vect_dump, "no vectype for scalar type ");
1623 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1624 }
1625
1626 return false;
1627 }
1628
1629 /* FORNOW */
1630 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1631 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1632 if (nunits_in == nunits_out / 2)
1633 modifier = NARROW;
1634 else if (nunits_out == nunits_in)
1635 modifier = NONE;
1636 else if (nunits_out == nunits_in / 2)
1637 modifier = WIDEN;
1638 else
1639 return false;
1640
1641 /* For now, we only vectorize functions if a target specific builtin
1642 is available. TODO -- in some cases, it might be profitable to
1643 insert the calls for pieces of the vector, in order to be able
1644 to vectorize other operations in the loop. */
1645 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1646 if (fndecl == NULL_TREE)
1647 {
1648 if (vect_print_dump_info (REPORT_DETAILS))
1649 fprintf (vect_dump, "function is not vectorizable.");
1650
1651 return false;
1652 }
1653
1654 gcc_assert (!gimple_vuse (stmt));
1655
1656 if (slp_node || PURE_SLP_STMT (stmt_info))
1657 ncopies = 1;
1658 else if (modifier == NARROW)
1659 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1660 else
1661 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1662
1663 /* Sanity check: make sure that at least one copy of the vectorized stmt
1664 needs to be generated. */
1665 gcc_assert (ncopies >= 1);
1666
1667 if (!vec_stmt) /* transformation not required. */
1668 {
1669 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1670 if (vect_print_dump_info (REPORT_DETAILS))
1671 fprintf (vect_dump, "=== vectorizable_call ===");
1672 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1673 return true;
1674 }
1675
1676 /** Transform. **/
1677
1678 if (vect_print_dump_info (REPORT_DETAILS))
1679 fprintf (vect_dump, "transform call.");
1680
1681 /* Handle def. */
1682 scalar_dest = gimple_call_lhs (stmt);
1683 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1684
1685 prev_stmt_info = NULL;
1686 switch (modifier)
1687 {
1688 case NONE:
1689 for (j = 0; j < ncopies; ++j)
1690 {
1691 /* Build argument list for the vectorized call. */
1692 if (j == 0)
1693 vargs = VEC_alloc (tree, heap, nargs);
1694 else
1695 VEC_truncate (tree, vargs, 0);
1696
1697 if (slp_node)
1698 {
1699 VEC (slp_void_p, heap) *vec_defs
1700 = VEC_alloc (slp_void_p, heap, nargs);
1701 VEC (tree, heap) *vec_oprnds0;
1702
1703 for (i = 0; i < nargs; i++)
1704 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1705 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1706 vec_oprnds0
1707 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1708
1709 /* Arguments are ready. Create the new vector stmt. */
1710 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1711 {
1712 size_t k;
1713 for (k = 0; k < nargs; k++)
1714 {
1715 VEC (tree, heap) *vec_oprndsk
1716 = (VEC (tree, heap) *)
1717 VEC_index (slp_void_p, vec_defs, k);
1718 VEC_replace (tree, vargs, k,
1719 VEC_index (tree, vec_oprndsk, i));
1720 }
1721 new_stmt = gimple_build_call_vec (fndecl, vargs);
1722 new_temp = make_ssa_name (vec_dest, new_stmt);
1723 gimple_call_set_lhs (new_stmt, new_temp);
1724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1725 mark_symbols_for_renaming (new_stmt);
1726 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1727 new_stmt);
1728 }
1729
1730 for (i = 0; i < nargs; i++)
1731 {
1732 VEC (tree, heap) *vec_oprndsi
1733 = (VEC (tree, heap) *)
1734 VEC_index (slp_void_p, vec_defs, i);
1735 VEC_free (tree, heap, vec_oprndsi);
1736 }
1737 VEC_free (slp_void_p, heap, vec_defs);
1738 continue;
1739 }
1740
1741 for (i = 0; i < nargs; i++)
1742 {
1743 op = gimple_call_arg (stmt, i);
1744 if (j == 0)
1745 vec_oprnd0
1746 = vect_get_vec_def_for_operand (op, stmt, NULL);
1747 else
1748 {
1749 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1750 vec_oprnd0
1751 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1752 }
1753
1754 VEC_quick_push (tree, vargs, vec_oprnd0);
1755 }
1756
1757 new_stmt = gimple_build_call_vec (fndecl, vargs);
1758 new_temp = make_ssa_name (vec_dest, new_stmt);
1759 gimple_call_set_lhs (new_stmt, new_temp);
1760
1761 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1762 mark_symbols_for_renaming (new_stmt);
1763
1764 if (j == 0)
1765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1766 else
1767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1768
1769 prev_stmt_info = vinfo_for_stmt (new_stmt);
1770 }
1771
1772 break;
1773
1774 case NARROW:
1775 for (j = 0; j < ncopies; ++j)
1776 {
1777 /* Build argument list for the vectorized call. */
1778 if (j == 0)
1779 vargs = VEC_alloc (tree, heap, nargs * 2);
1780 else
1781 VEC_truncate (tree, vargs, 0);
1782
1783 if (slp_node)
1784 {
1785 VEC (slp_void_p, heap) *vec_defs
1786 = VEC_alloc (slp_void_p, heap, nargs);
1787 VEC (tree, heap) *vec_oprnds0;
1788
1789 for (i = 0; i < nargs; i++)
1790 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1791 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1792 vec_oprnds0
1793 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1794
1795 /* Arguments are ready. Create the new vector stmt. */
1796 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1797 i += 2)
1798 {
1799 size_t k;
1800 VEC_truncate (tree, vargs, 0);
1801 for (k = 0; k < nargs; k++)
1802 {
1803 VEC (tree, heap) *vec_oprndsk
1804 = (VEC (tree, heap) *)
1805 VEC_index (slp_void_p, vec_defs, k);
1806 VEC_quick_push (tree, vargs,
1807 VEC_index (tree, vec_oprndsk, i));
1808 VEC_quick_push (tree, vargs,
1809 VEC_index (tree, vec_oprndsk, i + 1));
1810 }
1811 new_stmt = gimple_build_call_vec (fndecl, vargs);
1812 new_temp = make_ssa_name (vec_dest, new_stmt);
1813 gimple_call_set_lhs (new_stmt, new_temp);
1814 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1815 mark_symbols_for_renaming (new_stmt);
1816 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1817 new_stmt);
1818 }
1819
1820 for (i = 0; i < nargs; i++)
1821 {
1822 VEC (tree, heap) *vec_oprndsi
1823 = (VEC (tree, heap) *)
1824 VEC_index (slp_void_p, vec_defs, i);
1825 VEC_free (tree, heap, vec_oprndsi);
1826 }
1827 VEC_free (slp_void_p, heap, vec_defs);
1828 continue;
1829 }
1830
1831 for (i = 0; i < nargs; i++)
1832 {
1833 op = gimple_call_arg (stmt, i);
1834 if (j == 0)
1835 {
1836 vec_oprnd0
1837 = vect_get_vec_def_for_operand (op, stmt, NULL);
1838 vec_oprnd1
1839 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1840 }
1841 else
1842 {
1843 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1844 vec_oprnd0
1845 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1846 vec_oprnd1
1847 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1848 }
1849
1850 VEC_quick_push (tree, vargs, vec_oprnd0);
1851 VEC_quick_push (tree, vargs, vec_oprnd1);
1852 }
1853
1854 new_stmt = gimple_build_call_vec (fndecl, vargs);
1855 new_temp = make_ssa_name (vec_dest, new_stmt);
1856 gimple_call_set_lhs (new_stmt, new_temp);
1857
1858 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1859 mark_symbols_for_renaming (new_stmt);
1860
1861 if (j == 0)
1862 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1863 else
1864 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1865
1866 prev_stmt_info = vinfo_for_stmt (new_stmt);
1867 }
1868
1869 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1870
1871 break;
1872
1873 case WIDEN:
1874 /* No current target implements this case. */
1875 return false;
1876 }
1877
1878 VEC_free (tree, heap, vargs);
1879
1880 /* Update the exception handling table with the vector stmt if necessary. */
1881 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1882 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1883
1884 /* The call in STMT might prevent it from being removed in dce.
1885 We however cannot remove it here, due to the way the ssa name
1886 it defines is mapped to the new definition. So just replace
1887 rhs of the statement with something harmless. */
1888
1889 if (slp_node)
1890 return true;
1891
1892 type = TREE_TYPE (scalar_dest);
1893 if (is_pattern_stmt_p (stmt_info))
1894 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1895 else
1896 lhs = gimple_call_lhs (stmt);
1897 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1898 set_vinfo_for_stmt (new_stmt, stmt_info);
1899 set_vinfo_for_stmt (stmt, NULL);
1900 STMT_VINFO_STMT (stmt_info) = new_stmt;
1901 gsi_replace (gsi, new_stmt, false);
1902 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1903
1904 return true;
1905 }
1906
1907
1908 /* Function vect_gen_widened_results_half
1909
1910 Create a vector stmt whose code, type, number of arguments, and result
1911 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1912 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1913 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1914 needs to be created (DECL is a function-decl of a target-builtin).
1915 STMT is the original scalar stmt that we are vectorizing. */
1916
1917 static gimple
1918 vect_gen_widened_results_half (enum tree_code code,
1919 tree decl,
1920 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1921 tree vec_dest, gimple_stmt_iterator *gsi,
1922 gimple stmt)
1923 {
1924 gimple new_stmt;
1925 tree new_temp;
1926
1927 /* Generate half of the widened result: */
1928 if (code == CALL_EXPR)
1929 {
1930 /* Target specific support */
1931 if (op_type == binary_op)
1932 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1933 else
1934 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1935 new_temp = make_ssa_name (vec_dest, new_stmt);
1936 gimple_call_set_lhs (new_stmt, new_temp);
1937 }
1938 else
1939 {
1940 /* Generic support */
1941 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1942 if (op_type != binary_op)
1943 vec_oprnd1 = NULL;
1944 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1945 vec_oprnd1);
1946 new_temp = make_ssa_name (vec_dest, new_stmt);
1947 gimple_assign_set_lhs (new_stmt, new_temp);
1948 }
1949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1950
1951 return new_stmt;
1952 }
1953
1954
1955 /* Get vectorized definitions for loop-based vectorization. For the first
1956 operand we call vect_get_vec_def_for_operand() (with OPRND containing
1957 scalar operand), and for the rest we get a copy with
1958 vect_get_vec_def_for_stmt_copy() using the previous vector definition
1959 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
1960 The vectors are collected into VEC_OPRNDS. */
1961
1962 static void
1963 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
1964 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
1965 {
1966 tree vec_oprnd;
1967
1968 /* Get first vector operand. */
1969 /* All the vector operands except the very first one (that is scalar oprnd)
1970 are stmt copies. */
1971 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
1972 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
1973 else
1974 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
1975
1976 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1977
1978 /* Get second vector operand. */
1979 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
1980 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1981
1982 *oprnd = vec_oprnd;
1983
1984 /* For conversion in multiple steps, continue to get operands
1985 recursively. */
1986 if (multi_step_cvt)
1987 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
1988 }
1989
1990
1991 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
1992 For multi-step conversions store the resulting vectors and call the function
1993 recursively. */
1994
1995 static void
1996 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
1997 int multi_step_cvt, gimple stmt,
1998 VEC (tree, heap) *vec_dsts,
1999 gimple_stmt_iterator *gsi,
2000 slp_tree slp_node, enum tree_code code,
2001 stmt_vec_info *prev_stmt_info)
2002 {
2003 unsigned int i;
2004 tree vop0, vop1, new_tmp, vec_dest;
2005 gimple new_stmt;
2006 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2007
2008 vec_dest = VEC_pop (tree, vec_dsts);
2009
2010 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2011 {
2012 /* Create demotion operation. */
2013 vop0 = VEC_index (tree, *vec_oprnds, i);
2014 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2015 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2016 new_tmp = make_ssa_name (vec_dest, new_stmt);
2017 gimple_assign_set_lhs (new_stmt, new_tmp);
2018 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2019
2020 if (multi_step_cvt)
2021 /* Store the resulting vector for next recursive call. */
2022 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2023 else
2024 {
2025 /* This is the last step of the conversion sequence. Store the
2026 vectors in SLP_NODE or in vector info of the scalar statement
2027 (or in STMT_VINFO_RELATED_STMT chain). */
2028 if (slp_node)
2029 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2030 else
2031 {
2032 if (!*prev_stmt_info)
2033 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2034 else
2035 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2036
2037 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2038 }
2039 }
2040 }
2041
2042 /* For multi-step demotion operations we first generate demotion operations
2043 from the source type to the intermediate types, and then combine the
2044 results (stored in VEC_OPRNDS) in demotion operation to the destination
2045 type. */
2046 if (multi_step_cvt)
2047 {
2048 /* At each level of recursion we have half of the operands we had at the
2049 previous level. */
2050 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2051 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2052 stmt, vec_dsts, gsi, slp_node,
2053 VEC_PACK_TRUNC_EXPR,
2054 prev_stmt_info);
2055 }
2056
2057 VEC_quick_push (tree, vec_dsts, vec_dest);
2058 }
2059
2060
2061 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2062 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2063 the resulting vectors and call the function recursively. */
2064
2065 static void
2066 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2067 VEC (tree, heap) **vec_oprnds1,
2068 gimple stmt, tree vec_dest,
2069 gimple_stmt_iterator *gsi,
2070 enum tree_code code1,
2071 enum tree_code code2, tree decl1,
2072 tree decl2, int op_type)
2073 {
2074 int i;
2075 tree vop0, vop1, new_tmp1, new_tmp2;
2076 gimple new_stmt1, new_stmt2;
2077 VEC (tree, heap) *vec_tmp = NULL;
2078
2079 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2080 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2081 {
2082 if (op_type == binary_op)
2083 vop1 = VEC_index (tree, *vec_oprnds1, i);
2084 else
2085 vop1 = NULL_TREE;
2086
2087 /* Generate the two halves of promotion operation. */
2088 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2089 op_type, vec_dest, gsi, stmt);
2090 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2091 op_type, vec_dest, gsi, stmt);
2092 if (is_gimple_call (new_stmt1))
2093 {
2094 new_tmp1 = gimple_call_lhs (new_stmt1);
2095 new_tmp2 = gimple_call_lhs (new_stmt2);
2096 }
2097 else
2098 {
2099 new_tmp1 = gimple_assign_lhs (new_stmt1);
2100 new_tmp2 = gimple_assign_lhs (new_stmt2);
2101 }
2102
2103 /* Store the results for the next step. */
2104 VEC_quick_push (tree, vec_tmp, new_tmp1);
2105 VEC_quick_push (tree, vec_tmp, new_tmp2);
2106 }
2107
2108 VEC_free (tree, heap, *vec_oprnds0);
2109 *vec_oprnds0 = vec_tmp;
2110 }
2111
2112
2113 /* Check if STMT performs a conversion operation, that can be vectorized.
2114 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2115 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2116 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2117
2118 static bool
2119 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2120 gimple *vec_stmt, slp_tree slp_node)
2121 {
2122 tree vec_dest;
2123 tree scalar_dest;
2124 tree op0, op1 = NULL_TREE;
2125 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2126 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2127 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2128 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2129 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2130 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2131 tree new_temp;
2132 tree def;
2133 gimple def_stmt;
2134 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2135 gimple new_stmt = NULL;
2136 stmt_vec_info prev_stmt_info;
2137 int nunits_in;
2138 int nunits_out;
2139 tree vectype_out, vectype_in;
2140 int ncopies, i, j;
2141 tree lhs_type, rhs_type;
2142 enum { NARROW, NONE, WIDEN } modifier;
2143 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2144 tree vop0;
2145 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2146 int multi_step_cvt = 0;
2147 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2148 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2149 int op_type;
2150 enum machine_mode rhs_mode;
2151 unsigned short fltsz;
2152
2153 /* Is STMT a vectorizable conversion? */
2154
2155 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2156 return false;
2157
2158 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2159 return false;
2160
2161 if (!is_gimple_assign (stmt))
2162 return false;
2163
2164 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2165 return false;
2166
2167 code = gimple_assign_rhs_code (stmt);
2168 if (!CONVERT_EXPR_CODE_P (code)
2169 && code != FIX_TRUNC_EXPR
2170 && code != FLOAT_EXPR
2171 && code != WIDEN_MULT_EXPR
2172 && code != WIDEN_LSHIFT_EXPR)
2173 return false;
2174
2175 op_type = TREE_CODE_LENGTH (code);
2176
2177 /* Check types of lhs and rhs. */
2178 scalar_dest = gimple_assign_lhs (stmt);
2179 lhs_type = TREE_TYPE (scalar_dest);
2180 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2181
2182 op0 = gimple_assign_rhs1 (stmt);
2183 rhs_type = TREE_TYPE (op0);
2184
2185 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2186 && !((INTEGRAL_TYPE_P (lhs_type)
2187 && INTEGRAL_TYPE_P (rhs_type))
2188 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2189 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2190 return false;
2191
2192 if ((INTEGRAL_TYPE_P (lhs_type)
2193 && (TYPE_PRECISION (lhs_type)
2194 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2195 || (INTEGRAL_TYPE_P (rhs_type)
2196 && (TYPE_PRECISION (rhs_type)
2197 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2198 {
2199 if (vect_print_dump_info (REPORT_DETAILS))
2200 fprintf (vect_dump,
2201 "type conversion to/from bit-precision unsupported.");
2202 return false;
2203 }
2204
2205 /* Check the operands of the operation. */
2206 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2207 &def_stmt, &def, &dt[0], &vectype_in))
2208 {
2209 if (vect_print_dump_info (REPORT_DETAILS))
2210 fprintf (vect_dump, "use not simple.");
2211 return false;
2212 }
2213 if (op_type == binary_op)
2214 {
2215 bool ok;
2216
2217 op1 = gimple_assign_rhs2 (stmt);
2218 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2219 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2220 OP1. */
2221 if (CONSTANT_CLASS_P (op0))
2222 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
2223 &def_stmt, &def, &dt[1], &vectype_in);
2224 else
2225 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
2226 &dt[1]);
2227
2228 if (!ok)
2229 {
2230 if (vect_print_dump_info (REPORT_DETAILS))
2231 fprintf (vect_dump, "use not simple.");
2232 return false;
2233 }
2234 }
2235
2236 /* If op0 is an external or constant defs use a vector type of
2237 the same size as the output vector type. */
2238 if (!vectype_in)
2239 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2240 if (vec_stmt)
2241 gcc_assert (vectype_in);
2242 if (!vectype_in)
2243 {
2244 if (vect_print_dump_info (REPORT_DETAILS))
2245 {
2246 fprintf (vect_dump, "no vectype for scalar type ");
2247 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2248 }
2249
2250 return false;
2251 }
2252
2253 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2254 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2255 if (nunits_in < nunits_out)
2256 modifier = NARROW;
2257 else if (nunits_out == nunits_in)
2258 modifier = NONE;
2259 else
2260 modifier = WIDEN;
2261
2262 /* Multiple types in SLP are handled by creating the appropriate number of
2263 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2264 case of SLP. */
2265 if (slp_node || PURE_SLP_STMT (stmt_info))
2266 ncopies = 1;
2267 else if (modifier == NARROW)
2268 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2269 else
2270 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2271
2272 /* Sanity check: make sure that at least one copy of the vectorized stmt
2273 needs to be generated. */
2274 gcc_assert (ncopies >= 1);
2275
2276 /* Supportable by target? */
2277 switch (modifier)
2278 {
2279 case NONE:
2280 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2281 return false;
2282 if (supportable_convert_operation (code, vectype_out, vectype_in,
2283 &decl1, &code1))
2284 break;
2285 /* FALLTHRU */
2286 unsupported:
2287 if (vect_print_dump_info (REPORT_DETAILS))
2288 fprintf (vect_dump, "conversion not supported by target.");
2289 return false;
2290
2291 case WIDEN:
2292 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2293 &decl1, &decl2, &code1, &code2,
2294 &multi_step_cvt, &interm_types))
2295 {
2296 /* Binary widening operation can only be supported directly by the
2297 architecture. */
2298 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2299 break;
2300 }
2301
2302 if (code != FLOAT_EXPR
2303 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2304 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2305 goto unsupported;
2306
2307 rhs_mode = TYPE_MODE (rhs_type);
2308 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2309 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2310 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2311 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2312 {
2313 cvt_type
2314 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2315 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2316 if (cvt_type == NULL_TREE)
2317 goto unsupported;
2318
2319 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2320 {
2321 if (!supportable_convert_operation (code, vectype_out,
2322 cvt_type, &decl1, &codecvt1))
2323 goto unsupported;
2324 }
2325 else if (!supportable_widening_operation (code, stmt, vectype_out,
2326 cvt_type, &decl1, &decl2,
2327 &codecvt1, &codecvt2,
2328 &multi_step_cvt,
2329 &interm_types))
2330 continue;
2331 else
2332 gcc_assert (multi_step_cvt == 0);
2333
2334 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2335 vectype_in, NULL, NULL, &code1,
2336 &code2, &multi_step_cvt,
2337 &interm_types))
2338 break;
2339 }
2340
2341 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2342 goto unsupported;
2343
2344 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2345 codecvt2 = ERROR_MARK;
2346 else
2347 {
2348 multi_step_cvt++;
2349 VEC_safe_push (tree, heap, interm_types, cvt_type);
2350 cvt_type = NULL_TREE;
2351 }
2352 break;
2353
2354 case NARROW:
2355 gcc_assert (op_type == unary_op);
2356 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2357 &code1, &multi_step_cvt,
2358 &interm_types))
2359 break;
2360
2361 if (code != FIX_TRUNC_EXPR
2362 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2363 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2364 goto unsupported;
2365
2366 rhs_mode = TYPE_MODE (rhs_type);
2367 cvt_type
2368 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2369 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2370 if (cvt_type == NULL_TREE)
2371 goto unsupported;
2372 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2373 &decl1, &codecvt1))
2374 goto unsupported;
2375 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2376 &code1, &multi_step_cvt,
2377 &interm_types))
2378 break;
2379 goto unsupported;
2380
2381 default:
2382 gcc_unreachable ();
2383 }
2384
2385 if (!vec_stmt) /* transformation not required. */
2386 {
2387 if (vect_print_dump_info (REPORT_DETAILS))
2388 fprintf (vect_dump, "=== vectorizable_conversion ===");
2389 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2390 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2391 else if (modifier == NARROW)
2392 {
2393 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2394 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2395 }
2396 else
2397 {
2398 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2399 vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
2400 }
2401 VEC_free (tree, heap, interm_types);
2402 return true;
2403 }
2404
2405 /** Transform. **/
2406 if (vect_print_dump_info (REPORT_DETAILS))
2407 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2408
2409 if (op_type == binary_op)
2410 {
2411 if (CONSTANT_CLASS_P (op0))
2412 op0 = fold_convert (TREE_TYPE (op1), op0);
2413 else if (CONSTANT_CLASS_P (op1))
2414 op1 = fold_convert (TREE_TYPE (op0), op1);
2415 }
2416
2417 /* In case of multi-step conversion, we first generate conversion operations
2418 to the intermediate types, and then from that types to the final one.
2419 We create vector destinations for the intermediate type (TYPES) received
2420 from supportable_*_operation, and store them in the correct order
2421 for future use in vect_create_vectorized_*_stmts (). */
2422 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2423 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2424 VEC_quick_push (tree, vec_dsts, vec_dest);
2425
2426 if (multi_step_cvt)
2427 {
2428 for (i = VEC_length (tree, interm_types) - 1;
2429 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2430 {
2431 vec_dest = vect_create_destination_var (scalar_dest,
2432 intermediate_type);
2433 VEC_quick_push (tree, vec_dsts, vec_dest);
2434 }
2435 }
2436
2437 if (cvt_type)
2438 vec_dest = vect_create_destination_var (scalar_dest, cvt_type);
2439
2440 if (!slp_node)
2441 {
2442 if (modifier == NONE)
2443 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2444 else if (modifier == WIDEN)
2445 {
2446 vec_oprnds0 = VEC_alloc (tree, heap,
2447 (multi_step_cvt
2448 ? vect_pow2 (multi_step_cvt) : 1));
2449 if (op_type == binary_op)
2450 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2451 }
2452 else
2453 vec_oprnds0 = VEC_alloc (tree, heap,
2454 2 * (multi_step_cvt
2455 ? vect_pow2 (multi_step_cvt) : 1));
2456 }
2457 else if (code == WIDEN_LSHIFT_EXPR)
2458 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2459
2460 last_oprnd = op0;
2461 prev_stmt_info = NULL;
2462 switch (modifier)
2463 {
2464 case NONE:
2465 for (j = 0; j < ncopies; j++)
2466 {
2467 if (j == 0)
2468 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2469 -1);
2470 else
2471 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2472
2473 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2474 {
2475 /* Arguments are ready, create the new vector stmt. */
2476 if (code1 == CALL_EXPR)
2477 {
2478 new_stmt = gimple_build_call (decl1, 1, vop0);
2479 new_temp = make_ssa_name (vec_dest, new_stmt);
2480 gimple_call_set_lhs (new_stmt, new_temp);
2481 }
2482 else
2483 {
2484 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2485 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2486 vop0, NULL);
2487 new_temp = make_ssa_name (vec_dest, new_stmt);
2488 gimple_assign_set_lhs (new_stmt, new_temp);
2489 }
2490
2491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2492 if (slp_node)
2493 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2494 new_stmt);
2495 }
2496
2497 if (j == 0)
2498 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2499 else
2500 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2501 prev_stmt_info = vinfo_for_stmt (new_stmt);
2502 }
2503 break;
2504
2505 case WIDEN:
2506 /* In case the vectorization factor (VF) is bigger than the number
2507 of elements that we can fit in a vectype (nunits), we have to
2508 generate more than one vector stmt - i.e - we need to "unroll"
2509 the vector stmt by a factor VF/nunits. */
2510 for (j = 0; j < ncopies; j++)
2511 {
2512 /* Handle uses. */
2513 if (j == 0)
2514 {
2515 if (slp_node)
2516 {
2517 if (code == WIDEN_LSHIFT_EXPR)
2518 {
2519 unsigned int k;
2520
2521 vec_oprnd1 = op1;
2522 /* Store vec_oprnd1 for every vector stmt to be created
2523 for SLP_NODE. We check during the analysis that all
2524 the shift arguments are the same. */
2525 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2526 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2527
2528 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2529 slp_node, -1);
2530 }
2531 else
2532 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2533 &vec_oprnds1, slp_node, -1);
2534 }
2535 else
2536 {
2537 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2538 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2539 if (op_type == binary_op)
2540 {
2541 if (code == WIDEN_LSHIFT_EXPR)
2542 vec_oprnd1 = op1;
2543 else
2544 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2545 NULL);
2546 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2547 }
2548 }
2549 }
2550 else
2551 {
2552 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2553 VEC_truncate (tree, vec_oprnds0, 0);
2554 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2555 if (op_type == binary_op)
2556 {
2557 if (code == WIDEN_LSHIFT_EXPR)
2558 vec_oprnd1 = op1;
2559 else
2560 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2561 vec_oprnd1);
2562 VEC_truncate (tree, vec_oprnds1, 0);
2563 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2564 }
2565 }
2566
2567 /* Arguments are ready. Create the new vector stmts. */
2568 for (i = multi_step_cvt; i >= 0; i--)
2569 {
2570 tree this_dest = VEC_index (tree, vec_dsts, i);
2571 enum tree_code c1 = code1, c2 = code2;
2572 if (i == 0 && codecvt2 != ERROR_MARK)
2573 {
2574 c1 = codecvt1;
2575 c2 = codecvt2;
2576 }
2577 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2578 &vec_oprnds1,
2579 stmt, this_dest, gsi,
2580 c1, c2, decl1, decl2,
2581 op_type);
2582 }
2583
2584 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2585 {
2586 if (cvt_type)
2587 {
2588 if (codecvt1 == CALL_EXPR)
2589 {
2590 new_stmt = gimple_build_call (decl1, 1, vop0);
2591 new_temp = make_ssa_name (vec_dest, new_stmt);
2592 gimple_call_set_lhs (new_stmt, new_temp);
2593 }
2594 else
2595 {
2596 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2597 new_temp = make_ssa_name (vec_dest, NULL);
2598 new_stmt = gimple_build_assign_with_ops (codecvt1,
2599 new_temp,
2600 vop0, NULL);
2601 }
2602
2603 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2604 }
2605 else
2606 new_stmt = SSA_NAME_DEF_STMT (vop0);
2607
2608 if (slp_node)
2609 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2610 new_stmt);
2611 else
2612 {
2613 if (!prev_stmt_info)
2614 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2615 else
2616 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2617 prev_stmt_info = vinfo_for_stmt (new_stmt);
2618 }
2619 }
2620 }
2621
2622 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2623 break;
2624
2625 case NARROW:
2626 /* In case the vectorization factor (VF) is bigger than the number
2627 of elements that we can fit in a vectype (nunits), we have to
2628 generate more than one vector stmt - i.e - we need to "unroll"
2629 the vector stmt by a factor VF/nunits. */
2630 for (j = 0; j < ncopies; j++)
2631 {
2632 /* Handle uses. */
2633 if (slp_node)
2634 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2635 slp_node, -1);
2636 else
2637 {
2638 VEC_truncate (tree, vec_oprnds0, 0);
2639 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2640 vect_pow2 (multi_step_cvt) - 1);
2641 }
2642
2643 /* Arguments are ready. Create the new vector stmts. */
2644 if (cvt_type)
2645 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2646 {
2647 if (codecvt1 == CALL_EXPR)
2648 {
2649 new_stmt = gimple_build_call (decl1, 1, vop0);
2650 new_temp = make_ssa_name (vec_dest, new_stmt);
2651 gimple_call_set_lhs (new_stmt, new_temp);
2652 }
2653 else
2654 {
2655 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2656 new_temp = make_ssa_name (vec_dest, NULL);
2657 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2658 vop0, NULL);
2659 }
2660
2661 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2662 VEC_replace (tree, vec_oprnds0, i, new_temp);
2663 }
2664
2665 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2666 stmt, vec_dsts, gsi,
2667 slp_node, code1,
2668 &prev_stmt_info);
2669 }
2670
2671 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2672 break;
2673 }
2674
2675 VEC_free (tree, heap, vec_oprnds0);
2676 VEC_free (tree, heap, vec_oprnds1);
2677 VEC_free (tree, heap, vec_dsts);
2678 VEC_free (tree, heap, interm_types);
2679
2680 return true;
2681 }
2682
2683
2684 /* Function vectorizable_assignment.
2685
2686 Check if STMT performs an assignment (copy) that can be vectorized.
2687 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2688 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2689 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2690
2691 static bool
2692 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2693 gimple *vec_stmt, slp_tree slp_node)
2694 {
2695 tree vec_dest;
2696 tree scalar_dest;
2697 tree op;
2698 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2699 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2701 tree new_temp;
2702 tree def;
2703 gimple def_stmt;
2704 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2705 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2706 int ncopies;
2707 int i, j;
2708 VEC(tree,heap) *vec_oprnds = NULL;
2709 tree vop;
2710 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2711 gimple new_stmt = NULL;
2712 stmt_vec_info prev_stmt_info = NULL;
2713 enum tree_code code;
2714 tree vectype_in;
2715
2716 /* Multiple types in SLP are handled by creating the appropriate number of
2717 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2718 case of SLP. */
2719 if (slp_node || PURE_SLP_STMT (stmt_info))
2720 ncopies = 1;
2721 else
2722 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2723
2724 gcc_assert (ncopies >= 1);
2725
2726 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2727 return false;
2728
2729 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2730 return false;
2731
2732 /* Is vectorizable assignment? */
2733 if (!is_gimple_assign (stmt))
2734 return false;
2735
2736 scalar_dest = gimple_assign_lhs (stmt);
2737 if (TREE_CODE (scalar_dest) != SSA_NAME)
2738 return false;
2739
2740 code = gimple_assign_rhs_code (stmt);
2741 if (gimple_assign_single_p (stmt)
2742 || code == PAREN_EXPR
2743 || CONVERT_EXPR_CODE_P (code))
2744 op = gimple_assign_rhs1 (stmt);
2745 else
2746 return false;
2747
2748 if (code == VIEW_CONVERT_EXPR)
2749 op = TREE_OPERAND (op, 0);
2750
2751 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2752 &def_stmt, &def, &dt[0], &vectype_in))
2753 {
2754 if (vect_print_dump_info (REPORT_DETAILS))
2755 fprintf (vect_dump, "use not simple.");
2756 return false;
2757 }
2758
2759 /* We can handle NOP_EXPR conversions that do not change the number
2760 of elements or the vector size. */
2761 if ((CONVERT_EXPR_CODE_P (code)
2762 || code == VIEW_CONVERT_EXPR)
2763 && (!vectype_in
2764 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2765 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2766 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2767 return false;
2768
2769 /* We do not handle bit-precision changes. */
2770 if ((CONVERT_EXPR_CODE_P (code)
2771 || code == VIEW_CONVERT_EXPR)
2772 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2773 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2774 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2775 || ((TYPE_PRECISION (TREE_TYPE (op))
2776 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2777 /* But a conversion that does not change the bit-pattern is ok. */
2778 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2779 > TYPE_PRECISION (TREE_TYPE (op)))
2780 && TYPE_UNSIGNED (TREE_TYPE (op))))
2781 {
2782 if (vect_print_dump_info (REPORT_DETAILS))
2783 fprintf (vect_dump, "type conversion to/from bit-precision "
2784 "unsupported.");
2785 return false;
2786 }
2787
2788 if (!vec_stmt) /* transformation not required. */
2789 {
2790 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2791 if (vect_print_dump_info (REPORT_DETAILS))
2792 fprintf (vect_dump, "=== vectorizable_assignment ===");
2793 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2794 return true;
2795 }
2796
2797 /** Transform. **/
2798 if (vect_print_dump_info (REPORT_DETAILS))
2799 fprintf (vect_dump, "transform assignment.");
2800
2801 /* Handle def. */
2802 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2803
2804 /* Handle use. */
2805 for (j = 0; j < ncopies; j++)
2806 {
2807 /* Handle uses. */
2808 if (j == 0)
2809 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2810 else
2811 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2812
2813 /* Arguments are ready. create the new vector stmt. */
2814 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2815 {
2816 if (CONVERT_EXPR_CODE_P (code)
2817 || code == VIEW_CONVERT_EXPR)
2818 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2819 new_stmt = gimple_build_assign (vec_dest, vop);
2820 new_temp = make_ssa_name (vec_dest, new_stmt);
2821 gimple_assign_set_lhs (new_stmt, new_temp);
2822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2823 if (slp_node)
2824 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2825 }
2826
2827 if (slp_node)
2828 continue;
2829
2830 if (j == 0)
2831 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2832 else
2833 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2834
2835 prev_stmt_info = vinfo_for_stmt (new_stmt);
2836 }
2837
2838 VEC_free (tree, heap, vec_oprnds);
2839 return true;
2840 }
2841
2842
2843 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2844 either as shift by a scalar or by a vector. */
2845
2846 bool
2847 vect_supportable_shift (enum tree_code code, tree scalar_type)
2848 {
2849
2850 enum machine_mode vec_mode;
2851 optab optab;
2852 int icode;
2853 tree vectype;
2854
2855 vectype = get_vectype_for_scalar_type (scalar_type);
2856 if (!vectype)
2857 return false;
2858
2859 optab = optab_for_tree_code (code, vectype, optab_scalar);
2860 if (!optab
2861 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2862 {
2863 optab = optab_for_tree_code (code, vectype, optab_vector);
2864 if (!optab
2865 || (optab_handler (optab, TYPE_MODE (vectype))
2866 == CODE_FOR_nothing))
2867 return false;
2868 }
2869
2870 vec_mode = TYPE_MODE (vectype);
2871 icode = (int) optab_handler (optab, vec_mode);
2872 if (icode == CODE_FOR_nothing)
2873 return false;
2874
2875 return true;
2876 }
2877
2878
2879 /* Function vectorizable_shift.
2880
2881 Check if STMT performs a shift operation that can be vectorized.
2882 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2883 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2884 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2885
2886 static bool
2887 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2888 gimple *vec_stmt, slp_tree slp_node)
2889 {
2890 tree vec_dest;
2891 tree scalar_dest;
2892 tree op0, op1 = NULL;
2893 tree vec_oprnd1 = NULL_TREE;
2894 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2895 tree vectype;
2896 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2897 enum tree_code code;
2898 enum machine_mode vec_mode;
2899 tree new_temp;
2900 optab optab;
2901 int icode;
2902 enum machine_mode optab_op2_mode;
2903 tree def;
2904 gimple def_stmt;
2905 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2906 gimple new_stmt = NULL;
2907 stmt_vec_info prev_stmt_info;
2908 int nunits_in;
2909 int nunits_out;
2910 tree vectype_out;
2911 tree op1_vectype;
2912 int ncopies;
2913 int j, i;
2914 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2915 tree vop0, vop1;
2916 unsigned int k;
2917 bool scalar_shift_arg = true;
2918 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2919 int vf;
2920
2921 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2922 return false;
2923
2924 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2925 return false;
2926
2927 /* Is STMT a vectorizable binary/unary operation? */
2928 if (!is_gimple_assign (stmt))
2929 return false;
2930
2931 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2932 return false;
2933
2934 code = gimple_assign_rhs_code (stmt);
2935
2936 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2937 || code == RROTATE_EXPR))
2938 return false;
2939
2940 scalar_dest = gimple_assign_lhs (stmt);
2941 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2942 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2943 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2944 {
2945 if (vect_print_dump_info (REPORT_DETAILS))
2946 fprintf (vect_dump, "bit-precision shifts not supported.");
2947 return false;
2948 }
2949
2950 op0 = gimple_assign_rhs1 (stmt);
2951 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2952 &def_stmt, &def, &dt[0], &vectype))
2953 {
2954 if (vect_print_dump_info (REPORT_DETAILS))
2955 fprintf (vect_dump, "use not simple.");
2956 return false;
2957 }
2958 /* If op0 is an external or constant def use a vector type with
2959 the same size as the output vector type. */
2960 if (!vectype)
2961 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2962 if (vec_stmt)
2963 gcc_assert (vectype);
2964 if (!vectype)
2965 {
2966 if (vect_print_dump_info (REPORT_DETAILS))
2967 {
2968 fprintf (vect_dump, "no vectype for scalar type ");
2969 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2970 }
2971
2972 return false;
2973 }
2974
2975 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2976 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2977 if (nunits_out != nunits_in)
2978 return false;
2979
2980 op1 = gimple_assign_rhs2 (stmt);
2981 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2982 &dt[1], &op1_vectype))
2983 {
2984 if (vect_print_dump_info (REPORT_DETAILS))
2985 fprintf (vect_dump, "use not simple.");
2986 return false;
2987 }
2988
2989 if (loop_vinfo)
2990 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2991 else
2992 vf = 1;
2993
2994 /* Multiple types in SLP are handled by creating the appropriate number of
2995 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2996 case of SLP. */
2997 if (slp_node || PURE_SLP_STMT (stmt_info))
2998 ncopies = 1;
2999 else
3000 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3001
3002 gcc_assert (ncopies >= 1);
3003
3004 /* Determine whether the shift amount is a vector, or scalar. If the
3005 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3006
3007 if (dt[1] == vect_internal_def && !slp_node)
3008 scalar_shift_arg = false;
3009 else if (dt[1] == vect_constant_def
3010 || dt[1] == vect_external_def
3011 || dt[1] == vect_internal_def)
3012 {
3013 /* In SLP, need to check whether the shift count is the same,
3014 in loops if it is a constant or invariant, it is always
3015 a scalar shift. */
3016 if (slp_node)
3017 {
3018 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3019 gimple slpstmt;
3020
3021 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3022 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3023 scalar_shift_arg = false;
3024 }
3025 }
3026 else
3027 {
3028 if (vect_print_dump_info (REPORT_DETAILS))
3029 fprintf (vect_dump, "operand mode requires invariant argument.");
3030 return false;
3031 }
3032
3033 /* Vector shifted by vector. */
3034 if (!scalar_shift_arg)
3035 {
3036 optab = optab_for_tree_code (code, vectype, optab_vector);
3037 if (vect_print_dump_info (REPORT_DETAILS))
3038 fprintf (vect_dump, "vector/vector shift/rotate found.");
3039 if (!op1_vectype)
3040 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3041 if (op1_vectype == NULL_TREE
3042 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3043 {
3044 if (vect_print_dump_info (REPORT_DETAILS))
3045 fprintf (vect_dump, "unusable type for last operand in"
3046 " vector/vector shift/rotate.");
3047 return false;
3048 }
3049 }
3050 /* See if the machine has a vector shifted by scalar insn and if not
3051 then see if it has a vector shifted by vector insn. */
3052 else
3053 {
3054 optab = optab_for_tree_code (code, vectype, optab_scalar);
3055 if (optab
3056 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3057 {
3058 if (vect_print_dump_info (REPORT_DETAILS))
3059 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3060 }
3061 else
3062 {
3063 optab = optab_for_tree_code (code, vectype, optab_vector);
3064 if (optab
3065 && (optab_handler (optab, TYPE_MODE (vectype))
3066 != CODE_FOR_nothing))
3067 {
3068 scalar_shift_arg = false;
3069
3070 if (vect_print_dump_info (REPORT_DETAILS))
3071 fprintf (vect_dump, "vector/vector shift/rotate found.");
3072
3073 /* Unlike the other binary operators, shifts/rotates have
3074 the rhs being int, instead of the same type as the lhs,
3075 so make sure the scalar is the right type if we are
3076 dealing with vectors of long long/long/short/char. */
3077 if (dt[1] == vect_constant_def)
3078 op1 = fold_convert (TREE_TYPE (vectype), op1);
3079 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3080 TREE_TYPE (op1)))
3081 {
3082 if (slp_node
3083 && TYPE_MODE (TREE_TYPE (vectype))
3084 != TYPE_MODE (TREE_TYPE (op1)))
3085 {
3086 if (vect_print_dump_info (REPORT_DETAILS))
3087 fprintf (vect_dump, "unusable type for last operand in"
3088 " vector/vector shift/rotate.");
3089 return false;
3090 }
3091 if (vec_stmt && !slp_node)
3092 {
3093 op1 = fold_convert (TREE_TYPE (vectype), op1);
3094 op1 = vect_init_vector (stmt, op1,
3095 TREE_TYPE (vectype), NULL);
3096 }
3097 }
3098 }
3099 }
3100 }
3101
3102 /* Supportable by target? */
3103 if (!optab)
3104 {
3105 if (vect_print_dump_info (REPORT_DETAILS))
3106 fprintf (vect_dump, "no optab.");
3107 return false;
3108 }
3109 vec_mode = TYPE_MODE (vectype);
3110 icode = (int) optab_handler (optab, vec_mode);
3111 if (icode == CODE_FOR_nothing)
3112 {
3113 if (vect_print_dump_info (REPORT_DETAILS))
3114 fprintf (vect_dump, "op not supported by target.");
3115 /* Check only during analysis. */
3116 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3117 || (vf < vect_min_worthwhile_factor (code)
3118 && !vec_stmt))
3119 return false;
3120 if (vect_print_dump_info (REPORT_DETAILS))
3121 fprintf (vect_dump, "proceeding using word mode.");
3122 }
3123
3124 /* Worthwhile without SIMD support? Check only during analysis. */
3125 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3126 && vf < vect_min_worthwhile_factor (code)
3127 && !vec_stmt)
3128 {
3129 if (vect_print_dump_info (REPORT_DETAILS))
3130 fprintf (vect_dump, "not worthwhile without SIMD support.");
3131 return false;
3132 }
3133
3134 if (!vec_stmt) /* transformation not required. */
3135 {
3136 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3137 if (vect_print_dump_info (REPORT_DETAILS))
3138 fprintf (vect_dump, "=== vectorizable_shift ===");
3139 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3140 return true;
3141 }
3142
3143 /** Transform. **/
3144
3145 if (vect_print_dump_info (REPORT_DETAILS))
3146 fprintf (vect_dump, "transform binary/unary operation.");
3147
3148 /* Handle def. */
3149 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3150
3151 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3152 created in the previous stages of the recursion, so no allocation is
3153 needed, except for the case of shift with scalar shift argument. In that
3154 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3155 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3156 In case of loop-based vectorization we allocate VECs of size 1. We
3157 allocate VEC_OPRNDS1 only in case of binary operation. */
3158 if (!slp_node)
3159 {
3160 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3161 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3162 }
3163 else if (scalar_shift_arg)
3164 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3165
3166 prev_stmt_info = NULL;
3167 for (j = 0; j < ncopies; j++)
3168 {
3169 /* Handle uses. */
3170 if (j == 0)
3171 {
3172 if (scalar_shift_arg)
3173 {
3174 /* Vector shl and shr insn patterns can be defined with scalar
3175 operand 2 (shift operand). In this case, use constant or loop
3176 invariant op1 directly, without extending it to vector mode
3177 first. */
3178 optab_op2_mode = insn_data[icode].operand[2].mode;
3179 if (!VECTOR_MODE_P (optab_op2_mode))
3180 {
3181 if (vect_print_dump_info (REPORT_DETAILS))
3182 fprintf (vect_dump, "operand 1 using scalar mode.");
3183 vec_oprnd1 = op1;
3184 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3185 if (slp_node)
3186 {
3187 /* Store vec_oprnd1 for every vector stmt to be created
3188 for SLP_NODE. We check during the analysis that all
3189 the shift arguments are the same.
3190 TODO: Allow different constants for different vector
3191 stmts generated for an SLP instance. */
3192 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3193 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3194 }
3195 }
3196 }
3197
3198 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3199 (a special case for certain kind of vector shifts); otherwise,
3200 operand 1 should be of a vector type (the usual case). */
3201 if (vec_oprnd1)
3202 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3203 slp_node, -1);
3204 else
3205 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3206 slp_node, -1);
3207 }
3208 else
3209 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3210
3211 /* Arguments are ready. Create the new vector stmt. */
3212 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3213 {
3214 vop1 = VEC_index (tree, vec_oprnds1, i);
3215 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3216 new_temp = make_ssa_name (vec_dest, new_stmt);
3217 gimple_assign_set_lhs (new_stmt, new_temp);
3218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3219 if (slp_node)
3220 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3221 }
3222
3223 if (slp_node)
3224 continue;
3225
3226 if (j == 0)
3227 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3228 else
3229 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3230 prev_stmt_info = vinfo_for_stmt (new_stmt);
3231 }
3232
3233 VEC_free (tree, heap, vec_oprnds0);
3234 VEC_free (tree, heap, vec_oprnds1);
3235
3236 return true;
3237 }
3238
3239
3240 /* Function vectorizable_operation.
3241
3242 Check if STMT performs a binary, unary or ternary operation that can
3243 be vectorized.
3244 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3245 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3246 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3247
3248 static bool
3249 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3250 gimple *vec_stmt, slp_tree slp_node)
3251 {
3252 tree vec_dest;
3253 tree scalar_dest;
3254 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3255 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3256 tree vectype;
3257 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3258 enum tree_code code;
3259 enum machine_mode vec_mode;
3260 tree new_temp;
3261 int op_type;
3262 optab optab;
3263 int icode;
3264 tree def;
3265 gimple def_stmt;
3266 enum vect_def_type dt[3]
3267 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3268 gimple new_stmt = NULL;
3269 stmt_vec_info prev_stmt_info;
3270 int nunits_in;
3271 int nunits_out;
3272 tree vectype_out;
3273 int ncopies;
3274 int j, i;
3275 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3276 tree vop0, vop1, vop2;
3277 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3278 int vf;
3279
3280 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3281 return false;
3282
3283 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3284 return false;
3285
3286 /* Is STMT a vectorizable binary/unary operation? */
3287 if (!is_gimple_assign (stmt))
3288 return false;
3289
3290 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3291 return false;
3292
3293 code = gimple_assign_rhs_code (stmt);
3294
3295 /* For pointer addition, we should use the normal plus for
3296 the vector addition. */
3297 if (code == POINTER_PLUS_EXPR)
3298 code = PLUS_EXPR;
3299
3300 /* Support only unary or binary operations. */
3301 op_type = TREE_CODE_LENGTH (code);
3302 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3303 {
3304 if (vect_print_dump_info (REPORT_DETAILS))
3305 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3306 op_type);
3307 return false;
3308 }
3309
3310 scalar_dest = gimple_assign_lhs (stmt);
3311 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3312
3313 /* Most operations cannot handle bit-precision types without extra
3314 truncations. */
3315 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3316 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3317 /* Exception are bitwise binary operations. */
3318 && code != BIT_IOR_EXPR
3319 && code != BIT_XOR_EXPR
3320 && code != BIT_AND_EXPR)
3321 {
3322 if (vect_print_dump_info (REPORT_DETAILS))
3323 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3324 return false;
3325 }
3326
3327 op0 = gimple_assign_rhs1 (stmt);
3328 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3329 &def_stmt, &def, &dt[0], &vectype))
3330 {
3331 if (vect_print_dump_info (REPORT_DETAILS))
3332 fprintf (vect_dump, "use not simple.");
3333 return false;
3334 }
3335 /* If op0 is an external or constant def use a vector type with
3336 the same size as the output vector type. */
3337 if (!vectype)
3338 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3339 if (vec_stmt)
3340 gcc_assert (vectype);
3341 if (!vectype)
3342 {
3343 if (vect_print_dump_info (REPORT_DETAILS))
3344 {
3345 fprintf (vect_dump, "no vectype for scalar type ");
3346 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3347 }
3348
3349 return false;
3350 }
3351
3352 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3353 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3354 if (nunits_out != nunits_in)
3355 return false;
3356
3357 if (op_type == binary_op || op_type == ternary_op)
3358 {
3359 op1 = gimple_assign_rhs2 (stmt);
3360 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
3361 &dt[1]))
3362 {
3363 if (vect_print_dump_info (REPORT_DETAILS))
3364 fprintf (vect_dump, "use not simple.");
3365 return false;
3366 }
3367 }
3368 if (op_type == ternary_op)
3369 {
3370 op2 = gimple_assign_rhs3 (stmt);
3371 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
3372 &dt[2]))
3373 {
3374 if (vect_print_dump_info (REPORT_DETAILS))
3375 fprintf (vect_dump, "use not simple.");
3376 return false;
3377 }
3378 }
3379
3380 if (loop_vinfo)
3381 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3382 else
3383 vf = 1;
3384
3385 /* Multiple types in SLP are handled by creating the appropriate number of
3386 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3387 case of SLP. */
3388 if (slp_node || PURE_SLP_STMT (stmt_info))
3389 ncopies = 1;
3390 else
3391 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3392
3393 gcc_assert (ncopies >= 1);
3394
3395 /* Shifts are handled in vectorizable_shift (). */
3396 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3397 || code == RROTATE_EXPR)
3398 return false;
3399
3400 optab = optab_for_tree_code (code, vectype, optab_default);
3401
3402 /* Supportable by target? */
3403 if (!optab)
3404 {
3405 if (vect_print_dump_info (REPORT_DETAILS))
3406 fprintf (vect_dump, "no optab.");
3407 return false;
3408 }
3409 vec_mode = TYPE_MODE (vectype);
3410 icode = (int) optab_handler (optab, vec_mode);
3411 if (icode == CODE_FOR_nothing)
3412 {
3413 if (vect_print_dump_info (REPORT_DETAILS))
3414 fprintf (vect_dump, "op not supported by target.");
3415 /* Check only during analysis. */
3416 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3417 || (vf < vect_min_worthwhile_factor (code)
3418 && !vec_stmt))
3419 return false;
3420 if (vect_print_dump_info (REPORT_DETAILS))
3421 fprintf (vect_dump, "proceeding using word mode.");
3422 }
3423
3424 /* Worthwhile without SIMD support? Check only during analysis. */
3425 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3426 && vf < vect_min_worthwhile_factor (code)
3427 && !vec_stmt)
3428 {
3429 if (vect_print_dump_info (REPORT_DETAILS))
3430 fprintf (vect_dump, "not worthwhile without SIMD support.");
3431 return false;
3432 }
3433
3434 if (!vec_stmt) /* transformation not required. */
3435 {
3436 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3437 if (vect_print_dump_info (REPORT_DETAILS))
3438 fprintf (vect_dump, "=== vectorizable_operation ===");
3439 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3440 return true;
3441 }
3442
3443 /** Transform. **/
3444
3445 if (vect_print_dump_info (REPORT_DETAILS))
3446 fprintf (vect_dump, "transform binary/unary operation.");
3447
3448 /* Handle def. */
3449 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3450
3451 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3452 created in the previous stages of the recursion, so no allocation is
3453 needed, except for the case of shift with scalar shift argument. In that
3454 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3455 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3456 In case of loop-based vectorization we allocate VECs of size 1. We
3457 allocate VEC_OPRNDS1 only in case of binary operation. */
3458 if (!slp_node)
3459 {
3460 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3461 if (op_type == binary_op || op_type == ternary_op)
3462 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3463 if (op_type == ternary_op)
3464 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3465 }
3466
3467 /* In case the vectorization factor (VF) is bigger than the number
3468 of elements that we can fit in a vectype (nunits), we have to generate
3469 more than one vector stmt - i.e - we need to "unroll" the
3470 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3471 from one copy of the vector stmt to the next, in the field
3472 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3473 stages to find the correct vector defs to be used when vectorizing
3474 stmts that use the defs of the current stmt. The example below
3475 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3476 we need to create 4 vectorized stmts):
3477
3478 before vectorization:
3479 RELATED_STMT VEC_STMT
3480 S1: x = memref - -
3481 S2: z = x + 1 - -
3482
3483 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3484 there):
3485 RELATED_STMT VEC_STMT
3486 VS1_0: vx0 = memref0 VS1_1 -
3487 VS1_1: vx1 = memref1 VS1_2 -
3488 VS1_2: vx2 = memref2 VS1_3 -
3489 VS1_3: vx3 = memref3 - -
3490 S1: x = load - VS1_0
3491 S2: z = x + 1 - -
3492
3493 step2: vectorize stmt S2 (done here):
3494 To vectorize stmt S2 we first need to find the relevant vector
3495 def for the first operand 'x'. This is, as usual, obtained from
3496 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3497 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3498 relevant vector def 'vx0'. Having found 'vx0' we can generate
3499 the vector stmt VS2_0, and as usual, record it in the
3500 STMT_VINFO_VEC_STMT of stmt S2.
3501 When creating the second copy (VS2_1), we obtain the relevant vector
3502 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3503 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3504 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3505 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3506 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3507 chain of stmts and pointers:
3508 RELATED_STMT VEC_STMT
3509 VS1_0: vx0 = memref0 VS1_1 -
3510 VS1_1: vx1 = memref1 VS1_2 -
3511 VS1_2: vx2 = memref2 VS1_3 -
3512 VS1_3: vx3 = memref3 - -
3513 S1: x = load - VS1_0
3514 VS2_0: vz0 = vx0 + v1 VS2_1 -
3515 VS2_1: vz1 = vx1 + v1 VS2_2 -
3516 VS2_2: vz2 = vx2 + v1 VS2_3 -
3517 VS2_3: vz3 = vx3 + v1 - -
3518 S2: z = x + 1 - VS2_0 */
3519
3520 prev_stmt_info = NULL;
3521 for (j = 0; j < ncopies; j++)
3522 {
3523 /* Handle uses. */
3524 if (j == 0)
3525 {
3526 if (op_type == binary_op || op_type == ternary_op)
3527 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3528 slp_node, -1);
3529 else
3530 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3531 slp_node, -1);
3532 if (op_type == ternary_op)
3533 {
3534 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3535 VEC_quick_push (tree, vec_oprnds2,
3536 vect_get_vec_def_for_operand (op2, stmt, NULL));
3537 }
3538 }
3539 else
3540 {
3541 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3542 if (op_type == ternary_op)
3543 {
3544 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3545 VEC_quick_push (tree, vec_oprnds2,
3546 vect_get_vec_def_for_stmt_copy (dt[2],
3547 vec_oprnd));
3548 }
3549 }
3550
3551 /* Arguments are ready. Create the new vector stmt. */
3552 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3553 {
3554 vop1 = ((op_type == binary_op || op_type == ternary_op)
3555 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3556 vop2 = ((op_type == ternary_op)
3557 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3558 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3559 vop0, vop1, vop2);
3560 new_temp = make_ssa_name (vec_dest, new_stmt);
3561 gimple_assign_set_lhs (new_stmt, new_temp);
3562 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3563 if (slp_node)
3564 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3565 }
3566
3567 if (slp_node)
3568 continue;
3569
3570 if (j == 0)
3571 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3572 else
3573 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3574 prev_stmt_info = vinfo_for_stmt (new_stmt);
3575 }
3576
3577 VEC_free (tree, heap, vec_oprnds0);
3578 if (vec_oprnds1)
3579 VEC_free (tree, heap, vec_oprnds1);
3580 if (vec_oprnds2)
3581 VEC_free (tree, heap, vec_oprnds2);
3582
3583 return true;
3584 }
3585
3586
3587 /* Function vectorizable_store.
3588
3589 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3590 can be vectorized.
3591 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3592 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3593 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3594
3595 static bool
3596 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3597 slp_tree slp_node)
3598 {
3599 tree scalar_dest;
3600 tree data_ref;
3601 tree op;
3602 tree vec_oprnd = NULL_TREE;
3603 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3604 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3605 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3606 tree elem_type;
3607 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3608 struct loop *loop = NULL;
3609 enum machine_mode vec_mode;
3610 tree dummy;
3611 enum dr_alignment_support alignment_support_scheme;
3612 tree def;
3613 gimple def_stmt;
3614 enum vect_def_type dt;
3615 stmt_vec_info prev_stmt_info = NULL;
3616 tree dataref_ptr = NULL_TREE;
3617 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3618 int ncopies;
3619 int j;
3620 gimple next_stmt, first_stmt = NULL;
3621 bool strided_store = false;
3622 bool store_lanes_p = false;
3623 unsigned int group_size, i;
3624 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3625 bool inv_p;
3626 VEC(tree,heap) *vec_oprnds = NULL;
3627 bool slp = (slp_node != NULL);
3628 unsigned int vec_num;
3629 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3630 tree aggr_type;
3631
3632 if (loop_vinfo)
3633 loop = LOOP_VINFO_LOOP (loop_vinfo);
3634
3635 /* Multiple types in SLP are handled by creating the appropriate number of
3636 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3637 case of SLP. */
3638 if (slp || PURE_SLP_STMT (stmt_info))
3639 ncopies = 1;
3640 else
3641 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3642
3643 gcc_assert (ncopies >= 1);
3644
3645 /* FORNOW. This restriction should be relaxed. */
3646 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3647 {
3648 if (vect_print_dump_info (REPORT_DETAILS))
3649 fprintf (vect_dump, "multiple types in nested loop.");
3650 return false;
3651 }
3652
3653 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3654 return false;
3655
3656 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3657 return false;
3658
3659 /* Is vectorizable store? */
3660
3661 if (!is_gimple_assign (stmt))
3662 return false;
3663
3664 scalar_dest = gimple_assign_lhs (stmt);
3665 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3666 && is_pattern_stmt_p (stmt_info))
3667 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3668 if (TREE_CODE (scalar_dest) != ARRAY_REF
3669 && TREE_CODE (scalar_dest) != INDIRECT_REF
3670 && TREE_CODE (scalar_dest) != COMPONENT_REF
3671 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3672 && TREE_CODE (scalar_dest) != REALPART_EXPR
3673 && TREE_CODE (scalar_dest) != MEM_REF)
3674 return false;
3675
3676 gcc_assert (gimple_assign_single_p (stmt));
3677 op = gimple_assign_rhs1 (stmt);
3678 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3679 {
3680 if (vect_print_dump_info (REPORT_DETAILS))
3681 fprintf (vect_dump, "use not simple.");
3682 return false;
3683 }
3684
3685 elem_type = TREE_TYPE (vectype);
3686 vec_mode = TYPE_MODE (vectype);
3687
3688 /* FORNOW. In some cases can vectorize even if data-type not supported
3689 (e.g. - array initialization with 0). */
3690 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3691 return false;
3692
3693 if (!STMT_VINFO_DATA_REF (stmt_info))
3694 return false;
3695
3696 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3697 {
3698 if (vect_print_dump_info (REPORT_DETAILS))
3699 fprintf (vect_dump, "negative step for store.");
3700 return false;
3701 }
3702
3703 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3704 {
3705 strided_store = true;
3706 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3707 if (!slp && !PURE_SLP_STMT (stmt_info))
3708 {
3709 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3710 if (vect_store_lanes_supported (vectype, group_size))
3711 store_lanes_p = true;
3712 else if (!vect_strided_store_supported (vectype, group_size))
3713 return false;
3714 }
3715
3716 if (first_stmt == stmt)
3717 {
3718 /* STMT is the leader of the group. Check the operands of all the
3719 stmts of the group. */
3720 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3721 while (next_stmt)
3722 {
3723 gcc_assert (gimple_assign_single_p (next_stmt));
3724 op = gimple_assign_rhs1 (next_stmt);
3725 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3726 &def, &dt))
3727 {
3728 if (vect_print_dump_info (REPORT_DETAILS))
3729 fprintf (vect_dump, "use not simple.");
3730 return false;
3731 }
3732 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3733 }
3734 }
3735 }
3736
3737 if (!vec_stmt) /* transformation not required. */
3738 {
3739 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3740 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3741 return true;
3742 }
3743
3744 /** Transform. **/
3745
3746 if (strided_store)
3747 {
3748 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3749 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3750
3751 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3752
3753 /* FORNOW */
3754 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3755
3756 /* We vectorize all the stmts of the interleaving group when we
3757 reach the last stmt in the group. */
3758 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3759 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3760 && !slp)
3761 {
3762 *vec_stmt = NULL;
3763 return true;
3764 }
3765
3766 if (slp)
3767 {
3768 strided_store = false;
3769 /* VEC_NUM is the number of vect stmts to be created for this
3770 group. */
3771 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3772 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3773 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3774 op = gimple_assign_rhs1 (first_stmt);
3775 }
3776 else
3777 /* VEC_NUM is the number of vect stmts to be created for this
3778 group. */
3779 vec_num = group_size;
3780 }
3781 else
3782 {
3783 first_stmt = stmt;
3784 first_dr = dr;
3785 group_size = vec_num = 1;
3786 }
3787
3788 if (vect_print_dump_info (REPORT_DETAILS))
3789 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3790
3791 dr_chain = VEC_alloc (tree, heap, group_size);
3792 oprnds = VEC_alloc (tree, heap, group_size);
3793
3794 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3795 gcc_assert (alignment_support_scheme);
3796 /* Targets with store-lane instructions must not require explicit
3797 realignment. */
3798 gcc_assert (!store_lanes_p
3799 || alignment_support_scheme == dr_aligned
3800 || alignment_support_scheme == dr_unaligned_supported);
3801
3802 if (store_lanes_p)
3803 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3804 else
3805 aggr_type = vectype;
3806
3807 /* In case the vectorization factor (VF) is bigger than the number
3808 of elements that we can fit in a vectype (nunits), we have to generate
3809 more than one vector stmt - i.e - we need to "unroll" the
3810 vector stmt by a factor VF/nunits. For more details see documentation in
3811 vect_get_vec_def_for_copy_stmt. */
3812
3813 /* In case of interleaving (non-unit strided access):
3814
3815 S1: &base + 2 = x2
3816 S2: &base = x0
3817 S3: &base + 1 = x1
3818 S4: &base + 3 = x3
3819
3820 We create vectorized stores starting from base address (the access of the
3821 first stmt in the chain (S2 in the above example), when the last store stmt
3822 of the chain (S4) is reached:
3823
3824 VS1: &base = vx2
3825 VS2: &base + vec_size*1 = vx0
3826 VS3: &base + vec_size*2 = vx1
3827 VS4: &base + vec_size*3 = vx3
3828
3829 Then permutation statements are generated:
3830
3831 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3832 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3833 ...
3834
3835 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3836 (the order of the data-refs in the output of vect_permute_store_chain
3837 corresponds to the order of scalar stmts in the interleaving chain - see
3838 the documentation of vect_permute_store_chain()).
3839
3840 In case of both multiple types and interleaving, above vector stores and
3841 permutation stmts are created for every copy. The result vector stmts are
3842 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3843 STMT_VINFO_RELATED_STMT for the next copies.
3844 */
3845
3846 prev_stmt_info = NULL;
3847 for (j = 0; j < ncopies; j++)
3848 {
3849 gimple new_stmt;
3850 gimple ptr_incr;
3851
3852 if (j == 0)
3853 {
3854 if (slp)
3855 {
3856 /* Get vectorized arguments for SLP_NODE. */
3857 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3858 NULL, slp_node, -1);
3859
3860 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3861 }
3862 else
3863 {
3864 /* For interleaved stores we collect vectorized defs for all the
3865 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3866 used as an input to vect_permute_store_chain(), and OPRNDS as
3867 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3868
3869 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3870 OPRNDS are of size 1. */
3871 next_stmt = first_stmt;
3872 for (i = 0; i < group_size; i++)
3873 {
3874 /* Since gaps are not supported for interleaved stores,
3875 GROUP_SIZE is the exact number of stmts in the chain.
3876 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3877 there is no interleaving, GROUP_SIZE is 1, and only one
3878 iteration of the loop will be executed. */
3879 gcc_assert (next_stmt
3880 && gimple_assign_single_p (next_stmt));
3881 op = gimple_assign_rhs1 (next_stmt);
3882
3883 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3884 NULL);
3885 VEC_quick_push(tree, dr_chain, vec_oprnd);
3886 VEC_quick_push(tree, oprnds, vec_oprnd);
3887 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3888 }
3889 }
3890
3891 /* We should have catched mismatched types earlier. */
3892 gcc_assert (useless_type_conversion_p (vectype,
3893 TREE_TYPE (vec_oprnd)));
3894 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3895 NULL_TREE, &dummy, gsi,
3896 &ptr_incr, false, &inv_p);
3897 gcc_assert (bb_vinfo || !inv_p);
3898 }
3899 else
3900 {
3901 /* For interleaved stores we created vectorized defs for all the
3902 defs stored in OPRNDS in the previous iteration (previous copy).
3903 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3904 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3905 next copy.
3906 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3907 OPRNDS are of size 1. */
3908 for (i = 0; i < group_size; i++)
3909 {
3910 op = VEC_index (tree, oprnds, i);
3911 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3912 &dt);
3913 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3914 VEC_replace(tree, dr_chain, i, vec_oprnd);
3915 VEC_replace(tree, oprnds, i, vec_oprnd);
3916 }
3917 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3918 TYPE_SIZE_UNIT (aggr_type));
3919 }
3920
3921 if (store_lanes_p)
3922 {
3923 tree vec_array;
3924
3925 /* Combine all the vectors into an array. */
3926 vec_array = create_vector_array (vectype, vec_num);
3927 for (i = 0; i < vec_num; i++)
3928 {
3929 vec_oprnd = VEC_index (tree, dr_chain, i);
3930 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3931 }
3932
3933 /* Emit:
3934 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3935 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3936 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3937 gimple_call_set_lhs (new_stmt, data_ref);
3938 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3939 mark_symbols_for_renaming (new_stmt);
3940 }
3941 else
3942 {
3943 new_stmt = NULL;
3944 if (strided_store)
3945 {
3946 result_chain = VEC_alloc (tree, heap, group_size);
3947 /* Permute. */
3948 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3949 &result_chain);
3950 }
3951
3952 next_stmt = first_stmt;
3953 for (i = 0; i < vec_num; i++)
3954 {
3955 struct ptr_info_def *pi;
3956
3957 if (i > 0)
3958 /* Bump the vector pointer. */
3959 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3960 stmt, NULL_TREE);
3961
3962 if (slp)
3963 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3964 else if (strided_store)
3965 /* For strided stores vectorized defs are interleaved in
3966 vect_permute_store_chain(). */
3967 vec_oprnd = VEC_index (tree, result_chain, i);
3968
3969 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3970 build_int_cst (reference_alias_ptr_type
3971 (DR_REF (first_dr)), 0));
3972 pi = get_ptr_info (dataref_ptr);
3973 pi->align = TYPE_ALIGN_UNIT (vectype);
3974 if (aligned_access_p (first_dr))
3975 pi->misalign = 0;
3976 else if (DR_MISALIGNMENT (first_dr) == -1)
3977 {
3978 TREE_TYPE (data_ref)
3979 = build_aligned_type (TREE_TYPE (data_ref),
3980 TYPE_ALIGN (elem_type));
3981 pi->align = TYPE_ALIGN_UNIT (elem_type);
3982 pi->misalign = 0;
3983 }
3984 else
3985 {
3986 TREE_TYPE (data_ref)
3987 = build_aligned_type (TREE_TYPE (data_ref),
3988 TYPE_ALIGN (elem_type));
3989 pi->misalign = DR_MISALIGNMENT (first_dr);
3990 }
3991
3992 /* Arguments are ready. Create the new vector stmt. */
3993 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3995 mark_symbols_for_renaming (new_stmt);
3996
3997 if (slp)
3998 continue;
3999
4000 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4001 if (!next_stmt)
4002 break;
4003 }
4004 }
4005 if (!slp)
4006 {
4007 if (j == 0)
4008 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4009 else
4010 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4011 prev_stmt_info = vinfo_for_stmt (new_stmt);
4012 }
4013 }
4014
4015 VEC_free (tree, heap, dr_chain);
4016 VEC_free (tree, heap, oprnds);
4017 if (result_chain)
4018 VEC_free (tree, heap, result_chain);
4019 if (vec_oprnds)
4020 VEC_free (tree, heap, vec_oprnds);
4021
4022 return true;
4023 }
4024
4025 /* Given a vector type VECTYPE and permutation SEL returns
4026 the VECTOR_CST mask that implements the permutation of the
4027 vector elements. If that is impossible to do, returns NULL. */
4028
4029 static tree
4030 gen_perm_mask (tree vectype, unsigned char *sel)
4031 {
4032 tree mask_elt_type, mask_type, mask_vec;
4033 int i, nunits;
4034
4035 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4036
4037 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4038 return NULL;
4039
4040 mask_elt_type
4041 = lang_hooks.types.type_for_size
4042 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4043 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4044
4045 mask_vec = NULL;
4046 for (i = nunits - 1; i >= 0; i--)
4047 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4048 mask_vec);
4049 mask_vec = build_vector (mask_type, mask_vec);
4050
4051 return mask_vec;
4052 }
4053
4054 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4055 reversal of the vector elements. If that is impossible to do,
4056 returns NULL. */
4057
4058 static tree
4059 perm_mask_for_reverse (tree vectype)
4060 {
4061 int i, nunits;
4062 unsigned char *sel;
4063
4064 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4065 sel = XALLOCAVEC (unsigned char, nunits);
4066
4067 for (i = 0; i < nunits; ++i)
4068 sel[i] = nunits - 1 - i;
4069
4070 return gen_perm_mask (vectype, sel);
4071 }
4072
4073 /* Given a vector variable X and Y, that was generated for the scalar
4074 STMT, generate instructions to permute the vector elements of X and Y
4075 using permutation mask MASK_VEC, insert them at *GSI and return the
4076 permuted vector variable. */
4077
4078 static tree
4079 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4080 gimple_stmt_iterator *gsi)
4081 {
4082 tree vectype = TREE_TYPE (x);
4083 tree perm_dest, data_ref;
4084 gimple perm_stmt;
4085
4086 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4087 data_ref = make_ssa_name (perm_dest, NULL);
4088
4089 /* Generate the permute statement. */
4090 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4091 x, y, mask_vec);
4092 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4093
4094 return data_ref;
4095 }
4096
4097 /* vectorizable_load.
4098
4099 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4100 can be vectorized.
4101 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4102 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4103 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4104
4105 static bool
4106 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4107 slp_tree slp_node, slp_instance slp_node_instance)
4108 {
4109 tree scalar_dest;
4110 tree vec_dest = NULL;
4111 tree data_ref = NULL;
4112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4113 stmt_vec_info prev_stmt_info;
4114 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4115 struct loop *loop = NULL;
4116 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4117 bool nested_in_vect_loop = false;
4118 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4119 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4120 tree elem_type;
4121 tree new_temp;
4122 enum machine_mode mode;
4123 gimple new_stmt = NULL;
4124 tree dummy;
4125 enum dr_alignment_support alignment_support_scheme;
4126 tree dataref_ptr = NULL_TREE;
4127 gimple ptr_incr;
4128 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4129 int ncopies;
4130 int i, j, group_size;
4131 tree msq = NULL_TREE, lsq;
4132 tree offset = NULL_TREE;
4133 tree realignment_token = NULL_TREE;
4134 gimple phi = NULL;
4135 VEC(tree,heap) *dr_chain = NULL;
4136 bool strided_load = false;
4137 bool load_lanes_p = false;
4138 gimple first_stmt;
4139 bool inv_p;
4140 bool negative;
4141 bool compute_in_loop = false;
4142 struct loop *at_loop;
4143 int vec_num;
4144 bool slp = (slp_node != NULL);
4145 bool slp_perm = false;
4146 enum tree_code code;
4147 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4148 int vf;
4149 tree aggr_type;
4150 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4151 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4152 int gather_scale = 1;
4153 enum vect_def_type gather_dt = vect_unknown_def_type;
4154
4155 if (loop_vinfo)
4156 {
4157 loop = LOOP_VINFO_LOOP (loop_vinfo);
4158 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4159 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4160 }
4161 else
4162 vf = 1;
4163
4164 /* Multiple types in SLP are handled by creating the appropriate number of
4165 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4166 case of SLP. */
4167 if (slp || PURE_SLP_STMT (stmt_info))
4168 ncopies = 1;
4169 else
4170 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4171
4172 gcc_assert (ncopies >= 1);
4173
4174 /* FORNOW. This restriction should be relaxed. */
4175 if (nested_in_vect_loop && ncopies > 1)
4176 {
4177 if (vect_print_dump_info (REPORT_DETAILS))
4178 fprintf (vect_dump, "multiple types in nested loop.");
4179 return false;
4180 }
4181
4182 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4183 return false;
4184
4185 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4186 return false;
4187
4188 /* Is vectorizable load? */
4189 if (!is_gimple_assign (stmt))
4190 return false;
4191
4192 scalar_dest = gimple_assign_lhs (stmt);
4193 if (TREE_CODE (scalar_dest) != SSA_NAME)
4194 return false;
4195
4196 code = gimple_assign_rhs_code (stmt);
4197 if (code != ARRAY_REF
4198 && code != INDIRECT_REF
4199 && code != COMPONENT_REF
4200 && code != IMAGPART_EXPR
4201 && code != REALPART_EXPR
4202 && code != MEM_REF
4203 && TREE_CODE_CLASS (code) != tcc_declaration)
4204 return false;
4205
4206 if (!STMT_VINFO_DATA_REF (stmt_info))
4207 return false;
4208
4209 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4210 if (negative && ncopies > 1)
4211 {
4212 if (vect_print_dump_info (REPORT_DETAILS))
4213 fprintf (vect_dump, "multiple types with negative step.");
4214 return false;
4215 }
4216
4217 elem_type = TREE_TYPE (vectype);
4218 mode = TYPE_MODE (vectype);
4219
4220 /* FORNOW. In some cases can vectorize even if data-type not supported
4221 (e.g. - data copies). */
4222 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4223 {
4224 if (vect_print_dump_info (REPORT_DETAILS))
4225 fprintf (vect_dump, "Aligned load, but unsupported type.");
4226 return false;
4227 }
4228
4229 /* Check if the load is a part of an interleaving chain. */
4230 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4231 {
4232 strided_load = true;
4233 /* FORNOW */
4234 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4235
4236 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4237 if (!slp && !PURE_SLP_STMT (stmt_info))
4238 {
4239 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4240 if (vect_load_lanes_supported (vectype, group_size))
4241 load_lanes_p = true;
4242 else if (!vect_strided_load_supported (vectype, group_size))
4243 return false;
4244 }
4245 }
4246
4247 if (negative)
4248 {
4249 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4250 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4251 if (alignment_support_scheme != dr_aligned
4252 && alignment_support_scheme != dr_unaligned_supported)
4253 {
4254 if (vect_print_dump_info (REPORT_DETAILS))
4255 fprintf (vect_dump, "negative step but alignment required.");
4256 return false;
4257 }
4258 if (!perm_mask_for_reverse (vectype))
4259 {
4260 if (vect_print_dump_info (REPORT_DETAILS))
4261 fprintf (vect_dump, "negative step and reversing not supported.");
4262 return false;
4263 }
4264 }
4265
4266 if (STMT_VINFO_GATHER_P (stmt_info))
4267 {
4268 gimple def_stmt;
4269 tree def;
4270 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4271 &gather_off, &gather_scale);
4272 gcc_assert (gather_decl);
4273 if (!vect_is_simple_use_1 (gather_off, loop_vinfo, bb_vinfo,
4274 &def_stmt, &def, &gather_dt,
4275 &gather_off_vectype))
4276 {
4277 if (vect_print_dump_info (REPORT_DETAILS))
4278 fprintf (vect_dump, "gather index use not simple.");
4279 return false;
4280 }
4281 }
4282
4283 if (!vec_stmt) /* transformation not required. */
4284 {
4285 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4286 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4287 return true;
4288 }
4289
4290 if (vect_print_dump_info (REPORT_DETAILS))
4291 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4292
4293 /** Transform. **/
4294
4295 if (STMT_VINFO_GATHER_P (stmt_info))
4296 {
4297 tree vec_oprnd0 = NULL_TREE, op;
4298 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4299 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4300 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4301 edge pe = loop_preheader_edge (loop);
4302 gimple_seq seq;
4303 basic_block new_bb;
4304 enum { NARROW, NONE, WIDEN } modifier;
4305 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4306
4307 if (nunits == gather_off_nunits)
4308 modifier = NONE;
4309 else if (nunits == gather_off_nunits / 2)
4310 {
4311 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4312 modifier = WIDEN;
4313
4314 for (i = 0; i < gather_off_nunits; ++i)
4315 sel[i] = i | nunits;
4316
4317 perm_mask = gen_perm_mask (gather_off_vectype, sel);
4318 gcc_assert (perm_mask != NULL_TREE);
4319 }
4320 else if (nunits == gather_off_nunits * 2)
4321 {
4322 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4323 modifier = NARROW;
4324
4325 for (i = 0; i < nunits; ++i)
4326 sel[i] = i < gather_off_nunits
4327 ? i : i + nunits - gather_off_nunits;
4328
4329 perm_mask = gen_perm_mask (vectype, sel);
4330 gcc_assert (perm_mask != NULL_TREE);
4331 ncopies *= 2;
4332 }
4333 else
4334 gcc_unreachable ();
4335
4336 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4337 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4338 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4339 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4340 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4341 scaletype = TREE_VALUE (arglist);
4342 gcc_checking_assert (types_compatible_p (srctype, rettype)
4343 && types_compatible_p (srctype, masktype));
4344
4345 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4346
4347 ptr = fold_convert (ptrtype, gather_base);
4348 if (!is_gimple_min_invariant (ptr))
4349 {
4350 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4351 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4352 gcc_assert (!new_bb);
4353 }
4354
4355 /* Currently we support only unconditional gather loads,
4356 so mask should be all ones. */
4357 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4358 mask = build_int_cst (TREE_TYPE (masktype), -1);
4359 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4360 {
4361 REAL_VALUE_TYPE r;
4362 long tmp[6];
4363 for (j = 0; j < 6; ++j)
4364 tmp[j] = -1;
4365 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4366 mask = build_real (TREE_TYPE (masktype), r);
4367 }
4368 else
4369 gcc_unreachable ();
4370 mask = build_vector_from_val (masktype, mask);
4371 mask = vect_init_vector (stmt, mask, masktype, NULL);
4372
4373 scale = build_int_cst (scaletype, gather_scale);
4374
4375 prev_stmt_info = NULL;
4376 for (j = 0; j < ncopies; ++j)
4377 {
4378 if (modifier == WIDEN && (j & 1))
4379 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4380 perm_mask, stmt, gsi);
4381 else if (j == 0)
4382 op = vec_oprnd0
4383 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4384 else
4385 op = vec_oprnd0
4386 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4387
4388 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4389 {
4390 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4391 == TYPE_VECTOR_SUBPARTS (idxtype));
4392 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4393 add_referenced_var (var);
4394 var = make_ssa_name (var, NULL);
4395 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4396 new_stmt
4397 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4398 op, NULL_TREE);
4399 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4400 op = var;
4401 }
4402
4403 new_stmt
4404 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4405
4406 if (!useless_type_conversion_p (vectype, rettype))
4407 {
4408 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4409 == TYPE_VECTOR_SUBPARTS (rettype));
4410 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4411 add_referenced_var (var);
4412 op = make_ssa_name (var, new_stmt);
4413 gimple_call_set_lhs (new_stmt, op);
4414 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4415 var = make_ssa_name (vec_dest, NULL);
4416 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4417 new_stmt
4418 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4419 NULL_TREE);
4420 }
4421 else
4422 {
4423 var = make_ssa_name (vec_dest, new_stmt);
4424 gimple_call_set_lhs (new_stmt, var);
4425 }
4426
4427 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4428
4429 if (modifier == NARROW)
4430 {
4431 if ((j & 1) == 0)
4432 {
4433 prev_res = var;
4434 continue;
4435 }
4436 var = permute_vec_elements (prev_res, var,
4437 perm_mask, stmt, gsi);
4438 new_stmt = SSA_NAME_DEF_STMT (var);
4439 }
4440
4441 if (prev_stmt_info == NULL)
4442 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4443 else
4444 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4445 prev_stmt_info = vinfo_for_stmt (new_stmt);
4446 }
4447 return true;
4448 }
4449
4450 if (strided_load)
4451 {
4452 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4453 if (slp
4454 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4455 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4456 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4457
4458 /* Check if the chain of loads is already vectorized. */
4459 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4460 {
4461 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4462 return true;
4463 }
4464 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4465 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4466
4467 /* VEC_NUM is the number of vect stmts to be created for this group. */
4468 if (slp)
4469 {
4470 strided_load = false;
4471 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4472 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4473 slp_perm = true;
4474 }
4475 else
4476 vec_num = group_size;
4477 }
4478 else
4479 {
4480 first_stmt = stmt;
4481 first_dr = dr;
4482 group_size = vec_num = 1;
4483 }
4484
4485 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4486 gcc_assert (alignment_support_scheme);
4487 /* Targets with load-lane instructions must not require explicit
4488 realignment. */
4489 gcc_assert (!load_lanes_p
4490 || alignment_support_scheme == dr_aligned
4491 || alignment_support_scheme == dr_unaligned_supported);
4492
4493 /* In case the vectorization factor (VF) is bigger than the number
4494 of elements that we can fit in a vectype (nunits), we have to generate
4495 more than one vector stmt - i.e - we need to "unroll" the
4496 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4497 from one copy of the vector stmt to the next, in the field
4498 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4499 stages to find the correct vector defs to be used when vectorizing
4500 stmts that use the defs of the current stmt. The example below
4501 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4502 need to create 4 vectorized stmts):
4503
4504 before vectorization:
4505 RELATED_STMT VEC_STMT
4506 S1: x = memref - -
4507 S2: z = x + 1 - -
4508
4509 step 1: vectorize stmt S1:
4510 We first create the vector stmt VS1_0, and, as usual, record a
4511 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4512 Next, we create the vector stmt VS1_1, and record a pointer to
4513 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4514 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4515 stmts and pointers:
4516 RELATED_STMT VEC_STMT
4517 VS1_0: vx0 = memref0 VS1_1 -
4518 VS1_1: vx1 = memref1 VS1_2 -
4519 VS1_2: vx2 = memref2 VS1_3 -
4520 VS1_3: vx3 = memref3 - -
4521 S1: x = load - VS1_0
4522 S2: z = x + 1 - -
4523
4524 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4525 information we recorded in RELATED_STMT field is used to vectorize
4526 stmt S2. */
4527
4528 /* In case of interleaving (non-unit strided access):
4529
4530 S1: x2 = &base + 2
4531 S2: x0 = &base
4532 S3: x1 = &base + 1
4533 S4: x3 = &base + 3
4534
4535 Vectorized loads are created in the order of memory accesses
4536 starting from the access of the first stmt of the chain:
4537
4538 VS1: vx0 = &base
4539 VS2: vx1 = &base + vec_size*1
4540 VS3: vx3 = &base + vec_size*2
4541 VS4: vx4 = &base + vec_size*3
4542
4543 Then permutation statements are generated:
4544
4545 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4546 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4547 ...
4548
4549 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4550 (the order of the data-refs in the output of vect_permute_load_chain
4551 corresponds to the order of scalar stmts in the interleaving chain - see
4552 the documentation of vect_permute_load_chain()).
4553 The generation of permutation stmts and recording them in
4554 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4555
4556 In case of both multiple types and interleaving, the vector loads and
4557 permutation stmts above are created for every copy. The result vector
4558 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4559 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4560
4561 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4562 on a target that supports unaligned accesses (dr_unaligned_supported)
4563 we generate the following code:
4564 p = initial_addr;
4565 indx = 0;
4566 loop {
4567 p = p + indx * vectype_size;
4568 vec_dest = *(p);
4569 indx = indx + 1;
4570 }
4571
4572 Otherwise, the data reference is potentially unaligned on a target that
4573 does not support unaligned accesses (dr_explicit_realign_optimized) -
4574 then generate the following code, in which the data in each iteration is
4575 obtained by two vector loads, one from the previous iteration, and one
4576 from the current iteration:
4577 p1 = initial_addr;
4578 msq_init = *(floor(p1))
4579 p2 = initial_addr + VS - 1;
4580 realignment_token = call target_builtin;
4581 indx = 0;
4582 loop {
4583 p2 = p2 + indx * vectype_size
4584 lsq = *(floor(p2))
4585 vec_dest = realign_load (msq, lsq, realignment_token)
4586 indx = indx + 1;
4587 msq = lsq;
4588 } */
4589
4590 /* If the misalignment remains the same throughout the execution of the
4591 loop, we can create the init_addr and permutation mask at the loop
4592 preheader. Otherwise, it needs to be created inside the loop.
4593 This can only occur when vectorizing memory accesses in the inner-loop
4594 nested within an outer-loop that is being vectorized. */
4595
4596 if (loop && nested_in_vect_loop_p (loop, stmt)
4597 && (TREE_INT_CST_LOW (DR_STEP (dr))
4598 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4599 {
4600 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4601 compute_in_loop = true;
4602 }
4603
4604 if ((alignment_support_scheme == dr_explicit_realign_optimized
4605 || alignment_support_scheme == dr_explicit_realign)
4606 && !compute_in_loop)
4607 {
4608 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4609 alignment_support_scheme, NULL_TREE,
4610 &at_loop);
4611 if (alignment_support_scheme == dr_explicit_realign_optimized)
4612 {
4613 phi = SSA_NAME_DEF_STMT (msq);
4614 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4615 }
4616 }
4617 else
4618 at_loop = loop;
4619
4620 if (negative)
4621 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4622
4623 if (load_lanes_p)
4624 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4625 else
4626 aggr_type = vectype;
4627
4628 prev_stmt_info = NULL;
4629 for (j = 0; j < ncopies; j++)
4630 {
4631 /* 1. Create the vector or array pointer update chain. */
4632 if (j == 0)
4633 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4634 offset, &dummy, gsi,
4635 &ptr_incr, false, &inv_p);
4636 else
4637 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4638 TYPE_SIZE_UNIT (aggr_type));
4639
4640 if (strided_load || slp_perm)
4641 dr_chain = VEC_alloc (tree, heap, vec_num);
4642
4643 if (load_lanes_p)
4644 {
4645 tree vec_array;
4646
4647 vec_array = create_vector_array (vectype, vec_num);
4648
4649 /* Emit:
4650 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4651 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4652 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4653 gimple_call_set_lhs (new_stmt, vec_array);
4654 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4655 mark_symbols_for_renaming (new_stmt);
4656
4657 /* Extract each vector into an SSA_NAME. */
4658 for (i = 0; i < vec_num; i++)
4659 {
4660 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4661 vec_array, i);
4662 VEC_quick_push (tree, dr_chain, new_temp);
4663 }
4664
4665 /* Record the mapping between SSA_NAMEs and statements. */
4666 vect_record_strided_load_vectors (stmt, dr_chain);
4667 }
4668 else
4669 {
4670 for (i = 0; i < vec_num; i++)
4671 {
4672 if (i > 0)
4673 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4674 stmt, NULL_TREE);
4675
4676 /* 2. Create the vector-load in the loop. */
4677 switch (alignment_support_scheme)
4678 {
4679 case dr_aligned:
4680 case dr_unaligned_supported:
4681 {
4682 struct ptr_info_def *pi;
4683 data_ref
4684 = build2 (MEM_REF, vectype, dataref_ptr,
4685 build_int_cst (reference_alias_ptr_type
4686 (DR_REF (first_dr)), 0));
4687 pi = get_ptr_info (dataref_ptr);
4688 pi->align = TYPE_ALIGN_UNIT (vectype);
4689 if (alignment_support_scheme == dr_aligned)
4690 {
4691 gcc_assert (aligned_access_p (first_dr));
4692 pi->misalign = 0;
4693 }
4694 else if (DR_MISALIGNMENT (first_dr) == -1)
4695 {
4696 TREE_TYPE (data_ref)
4697 = build_aligned_type (TREE_TYPE (data_ref),
4698 TYPE_ALIGN (elem_type));
4699 pi->align = TYPE_ALIGN_UNIT (elem_type);
4700 pi->misalign = 0;
4701 }
4702 else
4703 {
4704 TREE_TYPE (data_ref)
4705 = build_aligned_type (TREE_TYPE (data_ref),
4706 TYPE_ALIGN (elem_type));
4707 pi->misalign = DR_MISALIGNMENT (first_dr);
4708 }
4709 break;
4710 }
4711 case dr_explicit_realign:
4712 {
4713 tree ptr, bump;
4714 tree vs_minus_1;
4715
4716 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4717
4718 if (compute_in_loop)
4719 msq = vect_setup_realignment (first_stmt, gsi,
4720 &realignment_token,
4721 dr_explicit_realign,
4722 dataref_ptr, NULL);
4723
4724 new_stmt = gimple_build_assign_with_ops
4725 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4726 build_int_cst
4727 (TREE_TYPE (dataref_ptr),
4728 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4729 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4730 gimple_assign_set_lhs (new_stmt, ptr);
4731 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4732 data_ref
4733 = build2 (MEM_REF, vectype, ptr,
4734 build_int_cst (reference_alias_ptr_type
4735 (DR_REF (first_dr)), 0));
4736 vec_dest = vect_create_destination_var (scalar_dest,
4737 vectype);
4738 new_stmt = gimple_build_assign (vec_dest, data_ref);
4739 new_temp = make_ssa_name (vec_dest, new_stmt);
4740 gimple_assign_set_lhs (new_stmt, new_temp);
4741 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4742 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4743 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4744 msq = new_temp;
4745
4746 bump = size_binop (MULT_EXPR, vs_minus_1,
4747 TYPE_SIZE_UNIT (elem_type));
4748 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4749 new_stmt = gimple_build_assign_with_ops
4750 (BIT_AND_EXPR, NULL_TREE, ptr,
4751 build_int_cst
4752 (TREE_TYPE (ptr),
4753 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4754 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4755 gimple_assign_set_lhs (new_stmt, ptr);
4756 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4757 data_ref
4758 = build2 (MEM_REF, vectype, ptr,
4759 build_int_cst (reference_alias_ptr_type
4760 (DR_REF (first_dr)), 0));
4761 break;
4762 }
4763 case dr_explicit_realign_optimized:
4764 new_stmt = gimple_build_assign_with_ops
4765 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4766 build_int_cst
4767 (TREE_TYPE (dataref_ptr),
4768 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4769 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4770 new_stmt);
4771 gimple_assign_set_lhs (new_stmt, new_temp);
4772 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4773 data_ref
4774 = build2 (MEM_REF, vectype, new_temp,
4775 build_int_cst (reference_alias_ptr_type
4776 (DR_REF (first_dr)), 0));
4777 break;
4778 default:
4779 gcc_unreachable ();
4780 }
4781 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4782 new_stmt = gimple_build_assign (vec_dest, data_ref);
4783 new_temp = make_ssa_name (vec_dest, new_stmt);
4784 gimple_assign_set_lhs (new_stmt, new_temp);
4785 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4786 mark_symbols_for_renaming (new_stmt);
4787
4788 /* 3. Handle explicit realignment if necessary/supported.
4789 Create in loop:
4790 vec_dest = realign_load (msq, lsq, realignment_token) */
4791 if (alignment_support_scheme == dr_explicit_realign_optimized
4792 || alignment_support_scheme == dr_explicit_realign)
4793 {
4794 lsq = gimple_assign_lhs (new_stmt);
4795 if (!realignment_token)
4796 realignment_token = dataref_ptr;
4797 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4798 new_stmt
4799 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4800 vec_dest, msq, lsq,
4801 realignment_token);
4802 new_temp = make_ssa_name (vec_dest, new_stmt);
4803 gimple_assign_set_lhs (new_stmt, new_temp);
4804 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4805
4806 if (alignment_support_scheme == dr_explicit_realign_optimized)
4807 {
4808 gcc_assert (phi);
4809 if (i == vec_num - 1 && j == ncopies - 1)
4810 add_phi_arg (phi, lsq,
4811 loop_latch_edge (containing_loop),
4812 UNKNOWN_LOCATION);
4813 msq = lsq;
4814 }
4815 }
4816
4817 /* 4. Handle invariant-load. */
4818 if (inv_p && !bb_vinfo)
4819 {
4820 tree tem, vec_inv;
4821 gimple_stmt_iterator gsi2 = *gsi;
4822 gcc_assert (!strided_load);
4823 gsi_next (&gsi2);
4824 tem = scalar_dest;
4825 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4826 TREE_TYPE (tem)))
4827 {
4828 tem = fold_convert (TREE_TYPE (vectype), tem);
4829 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4830 NULL_TREE, true,
4831 GSI_SAME_STMT);
4832 }
4833 vec_inv = build_vector_from_val (vectype, tem);
4834 new_temp = vect_init_vector (stmt, vec_inv,
4835 vectype, &gsi2);
4836 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4837 }
4838
4839 if (negative)
4840 {
4841 tree perm_mask = perm_mask_for_reverse (vectype);
4842 new_temp = permute_vec_elements (new_temp, new_temp,
4843 perm_mask, stmt, gsi);
4844 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4845 }
4846
4847 /* Collect vector loads and later create their permutation in
4848 vect_transform_strided_load (). */
4849 if (strided_load || slp_perm)
4850 VEC_quick_push (tree, dr_chain, new_temp);
4851
4852 /* Store vector loads in the corresponding SLP_NODE. */
4853 if (slp && !slp_perm)
4854 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4855 new_stmt);
4856 }
4857 }
4858
4859 if (slp && !slp_perm)
4860 continue;
4861
4862 if (slp_perm)
4863 {
4864 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4865 slp_node_instance, false))
4866 {
4867 VEC_free (tree, heap, dr_chain);
4868 return false;
4869 }
4870 }
4871 else
4872 {
4873 if (strided_load)
4874 {
4875 if (!load_lanes_p)
4876 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4877 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4878 }
4879 else
4880 {
4881 if (j == 0)
4882 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4883 else
4884 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4885 prev_stmt_info = vinfo_for_stmt (new_stmt);
4886 }
4887 }
4888 if (dr_chain)
4889 VEC_free (tree, heap, dr_chain);
4890 }
4891
4892 return true;
4893 }
4894
4895 /* Function vect_is_simple_cond.
4896
4897 Input:
4898 LOOP - the loop that is being vectorized.
4899 COND - Condition that is checked for simple use.
4900
4901 Output:
4902 *COMP_VECTYPE - the vector type for the comparison.
4903
4904 Returns whether a COND can be vectorized. Checks whether
4905 condition operands are supportable using vec_is_simple_use. */
4906
4907 static bool
4908 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
4909 tree *comp_vectype)
4910 {
4911 tree lhs, rhs;
4912 tree def;
4913 enum vect_def_type dt;
4914 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4915
4916 if (!COMPARISON_CLASS_P (cond))
4917 return false;
4918
4919 lhs = TREE_OPERAND (cond, 0);
4920 rhs = TREE_OPERAND (cond, 1);
4921
4922 if (TREE_CODE (lhs) == SSA_NAME)
4923 {
4924 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4925 if (!vect_is_simple_use_1 (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
4926 &dt, &vectype1))
4927 return false;
4928 }
4929 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4930 && TREE_CODE (lhs) != FIXED_CST)
4931 return false;
4932
4933 if (TREE_CODE (rhs) == SSA_NAME)
4934 {
4935 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4936 if (!vect_is_simple_use_1 (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
4937 &dt, &vectype2))
4938 return false;
4939 }
4940 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4941 && TREE_CODE (rhs) != FIXED_CST)
4942 return false;
4943
4944 *comp_vectype = vectype1 ? vectype1 : vectype2;
4945 return true;
4946 }
4947
4948 /* vectorizable_condition.
4949
4950 Check if STMT is conditional modify expression that can be vectorized.
4951 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4952 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4953 at GSI.
4954
4955 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4956 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4957 else caluse if it is 2).
4958
4959 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4960
4961 bool
4962 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4963 gimple *vec_stmt, tree reduc_def, int reduc_index,
4964 slp_tree slp_node)
4965 {
4966 tree scalar_dest = NULL_TREE;
4967 tree vec_dest = NULL_TREE;
4968 tree cond_expr, then_clause, else_clause;
4969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4970 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4971 tree comp_vectype;
4972 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4973 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4974 tree vec_compare, vec_cond_expr;
4975 tree new_temp;
4976 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4977 tree def;
4978 enum vect_def_type dt, dts[4];
4979 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4980 int ncopies;
4981 enum tree_code code;
4982 stmt_vec_info prev_stmt_info = NULL;
4983 int i, j;
4984 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4985 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
4986 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
4987
4988 if (slp_node || PURE_SLP_STMT (stmt_info))
4989 ncopies = 1;
4990 else
4991 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4992
4993 gcc_assert (ncopies >= 1);
4994 if (reduc_index && ncopies > 1)
4995 return false; /* FORNOW */
4996
4997 if (reduc_index && STMT_SLP_TYPE (stmt_info))
4998 return false;
4999
5000 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5001 return false;
5002
5003 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5004 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5005 && reduc_def))
5006 return false;
5007
5008 /* FORNOW: not yet supported. */
5009 if (STMT_VINFO_LIVE_P (stmt_info))
5010 {
5011 if (vect_print_dump_info (REPORT_DETAILS))
5012 fprintf (vect_dump, "value used after loop.");
5013 return false;
5014 }
5015
5016 /* Is vectorizable conditional operation? */
5017 if (!is_gimple_assign (stmt))
5018 return false;
5019
5020 code = gimple_assign_rhs_code (stmt);
5021
5022 if (code != COND_EXPR)
5023 return false;
5024
5025 cond_expr = gimple_assign_rhs1 (stmt);
5026 then_clause = gimple_assign_rhs2 (stmt);
5027 else_clause = gimple_assign_rhs3 (stmt);
5028
5029 if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo, &comp_vectype)
5030 || !comp_vectype)
5031 return false;
5032
5033 if (TREE_CODE (then_clause) == SSA_NAME)
5034 {
5035 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5036 if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
5037 &then_def_stmt, &def, &dt))
5038 return false;
5039 }
5040 else if (TREE_CODE (then_clause) != INTEGER_CST
5041 && TREE_CODE (then_clause) != REAL_CST
5042 && TREE_CODE (then_clause) != FIXED_CST)
5043 return false;
5044
5045 if (TREE_CODE (else_clause) == SSA_NAME)
5046 {
5047 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5048 if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
5049 &else_def_stmt, &def, &dt))
5050 return false;
5051 }
5052 else if (TREE_CODE (else_clause) != INTEGER_CST
5053 && TREE_CODE (else_clause) != REAL_CST
5054 && TREE_CODE (else_clause) != FIXED_CST)
5055 return false;
5056
5057 if (!vec_stmt)
5058 {
5059 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5060 return expand_vec_cond_expr_p (vectype, comp_vectype);
5061 }
5062
5063 /* Transform. */
5064
5065 if (!slp_node)
5066 {
5067 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5068 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5069 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5070 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5071 }
5072
5073 /* Handle def. */
5074 scalar_dest = gimple_assign_lhs (stmt);
5075 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5076
5077 /* Handle cond expr. */
5078 for (j = 0; j < ncopies; j++)
5079 {
5080 gimple new_stmt = NULL;
5081 if (j == 0)
5082 {
5083 if (slp_node)
5084 {
5085 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5086 VEC (slp_void_p, heap) *vec_defs;
5087
5088 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5089 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5090 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5091 VEC_safe_push (tree, heap, ops, then_clause);
5092 VEC_safe_push (tree, heap, ops, else_clause);
5093 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5094 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5095 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5096 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5097 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5098
5099 VEC_free (tree, heap, ops);
5100 VEC_free (slp_void_p, heap, vec_defs);
5101 }
5102 else
5103 {
5104 gimple gtemp;
5105 vec_cond_lhs =
5106 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5107 stmt, NULL);
5108 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
5109 NULL, &gtemp, &def, &dts[0]);
5110
5111 vec_cond_rhs =
5112 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5113 stmt, NULL);
5114 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
5115 NULL, &gtemp, &def, &dts[1]);
5116 if (reduc_index == 1)
5117 vec_then_clause = reduc_def;
5118 else
5119 {
5120 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5121 stmt, NULL);
5122 vect_is_simple_use (then_clause, loop_vinfo,
5123 NULL, &gtemp, &def, &dts[2]);
5124 }
5125 if (reduc_index == 2)
5126 vec_else_clause = reduc_def;
5127 else
5128 {
5129 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5130 stmt, NULL);
5131 vect_is_simple_use (else_clause, loop_vinfo,
5132 NULL, &gtemp, &def, &dts[3]);
5133 }
5134 }
5135 }
5136 else
5137 {
5138 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5139 VEC_pop (tree, vec_oprnds0));
5140 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5141 VEC_pop (tree, vec_oprnds1));
5142 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5143 VEC_pop (tree, vec_oprnds2));
5144 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5145 VEC_pop (tree, vec_oprnds3));
5146 }
5147
5148 if (!slp_node)
5149 {
5150 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5151 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5152 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5153 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5154 }
5155
5156 /* Arguments are ready. Create the new vector stmt. */
5157 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5158 {
5159 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5160 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5161 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5162
5163 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5164 vec_cond_lhs, vec_cond_rhs);
5165 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5166 vec_compare, vec_then_clause, vec_else_clause);
5167
5168 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5169 new_temp = make_ssa_name (vec_dest, new_stmt);
5170 gimple_assign_set_lhs (new_stmt, new_temp);
5171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5172 if (slp_node)
5173 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5174 }
5175
5176 if (slp_node)
5177 continue;
5178
5179 if (j == 0)
5180 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5181 else
5182 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5183
5184 prev_stmt_info = vinfo_for_stmt (new_stmt);
5185 }
5186
5187 VEC_free (tree, heap, vec_oprnds0);
5188 VEC_free (tree, heap, vec_oprnds1);
5189 VEC_free (tree, heap, vec_oprnds2);
5190 VEC_free (tree, heap, vec_oprnds3);
5191
5192 return true;
5193 }
5194
5195
5196 /* Make sure the statement is vectorizable. */
5197
5198 bool
5199 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5200 {
5201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5202 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5203 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5204 bool ok;
5205 tree scalar_type, vectype;
5206 gimple pattern_stmt, pattern_def_stmt;
5207
5208 if (vect_print_dump_info (REPORT_DETAILS))
5209 {
5210 fprintf (vect_dump, "==> examining statement: ");
5211 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5212 }
5213
5214 if (gimple_has_volatile_ops (stmt))
5215 {
5216 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5217 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5218
5219 return false;
5220 }
5221
5222 /* Skip stmts that do not need to be vectorized. In loops this is expected
5223 to include:
5224 - the COND_EXPR which is the loop exit condition
5225 - any LABEL_EXPRs in the loop
5226 - computations that are used only for array indexing or loop control.
5227 In basic blocks we only analyze statements that are a part of some SLP
5228 instance, therefore, all the statements are relevant.
5229
5230 Pattern statement needs to be analyzed instead of the original statement
5231 if the original statement is not relevant. Otherwise, we analyze both
5232 statements. */
5233
5234 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5235 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5236 && !STMT_VINFO_LIVE_P (stmt_info))
5237 {
5238 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5239 && pattern_stmt
5240 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5241 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5242 {
5243 /* Analyze PATTERN_STMT instead of the original stmt. */
5244 stmt = pattern_stmt;
5245 stmt_info = vinfo_for_stmt (pattern_stmt);
5246 if (vect_print_dump_info (REPORT_DETAILS))
5247 {
5248 fprintf (vect_dump, "==> examining pattern statement: ");
5249 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5250 }
5251 }
5252 else
5253 {
5254 if (vect_print_dump_info (REPORT_DETAILS))
5255 fprintf (vect_dump, "irrelevant.");
5256
5257 return true;
5258 }
5259 }
5260 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5261 && pattern_stmt
5262 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5263 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5264 {
5265 /* Analyze PATTERN_STMT too. */
5266 if (vect_print_dump_info (REPORT_DETAILS))
5267 {
5268 fprintf (vect_dump, "==> examining pattern statement: ");
5269 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5270 }
5271
5272 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5273 return false;
5274 }
5275
5276 if (is_pattern_stmt_p (stmt_info)
5277 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5278 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5279 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5280 {
5281 /* Analyze def stmt of STMT if it's a pattern stmt. */
5282 if (vect_print_dump_info (REPORT_DETAILS))
5283 {
5284 fprintf (vect_dump, "==> examining pattern def statement: ");
5285 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5286 }
5287
5288 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5289 return false;
5290 }
5291
5292
5293 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5294 {
5295 case vect_internal_def:
5296 break;
5297
5298 case vect_reduction_def:
5299 case vect_nested_cycle:
5300 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5301 || relevance == vect_used_in_outer_by_reduction
5302 || relevance == vect_unused_in_scope));
5303 break;
5304
5305 case vect_induction_def:
5306 case vect_constant_def:
5307 case vect_external_def:
5308 case vect_unknown_def_type:
5309 default:
5310 gcc_unreachable ();
5311 }
5312
5313 if (bb_vinfo)
5314 {
5315 gcc_assert (PURE_SLP_STMT (stmt_info));
5316
5317 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5318 if (vect_print_dump_info (REPORT_DETAILS))
5319 {
5320 fprintf (vect_dump, "get vectype for scalar type: ");
5321 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5322 }
5323
5324 vectype = get_vectype_for_scalar_type (scalar_type);
5325 if (!vectype)
5326 {
5327 if (vect_print_dump_info (REPORT_DETAILS))
5328 {
5329 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5330 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5331 }
5332 return false;
5333 }
5334
5335 if (vect_print_dump_info (REPORT_DETAILS))
5336 {
5337 fprintf (vect_dump, "vectype: ");
5338 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5339 }
5340
5341 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5342 }
5343
5344 if (STMT_VINFO_RELEVANT_P (stmt_info))
5345 {
5346 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5347 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5348 *need_to_vectorize = true;
5349 }
5350
5351 ok = true;
5352 if (!bb_vinfo
5353 && (STMT_VINFO_RELEVANT_P (stmt_info)
5354 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5355 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5356 || vectorizable_shift (stmt, NULL, NULL, NULL)
5357 || vectorizable_operation (stmt, NULL, NULL, NULL)
5358 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5359 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5360 || vectorizable_call (stmt, NULL, NULL, NULL)
5361 || vectorizable_store (stmt, NULL, NULL, NULL)
5362 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5363 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5364 else
5365 {
5366 if (bb_vinfo)
5367 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5368 || vectorizable_shift (stmt, NULL, NULL, node)
5369 || vectorizable_operation (stmt, NULL, NULL, node)
5370 || vectorizable_assignment (stmt, NULL, NULL, node)
5371 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5372 || vectorizable_call (stmt, NULL, NULL, node)
5373 || vectorizable_store (stmt, NULL, NULL, node)
5374 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5375 }
5376
5377 if (!ok)
5378 {
5379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5380 {
5381 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5382 fprintf (vect_dump, "supported: ");
5383 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5384 }
5385
5386 return false;
5387 }
5388
5389 if (bb_vinfo)
5390 return true;
5391
5392 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5393 need extra handling, except for vectorizable reductions. */
5394 if (STMT_VINFO_LIVE_P (stmt_info)
5395 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5396 ok = vectorizable_live_operation (stmt, NULL, NULL);
5397
5398 if (!ok)
5399 {
5400 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5401 {
5402 fprintf (vect_dump, "not vectorized: live stmt not ");
5403 fprintf (vect_dump, "supported: ");
5404 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5405 }
5406
5407 return false;
5408 }
5409
5410 return true;
5411 }
5412
5413
5414 /* Function vect_transform_stmt.
5415
5416 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5417
5418 bool
5419 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5420 bool *strided_store, slp_tree slp_node,
5421 slp_instance slp_node_instance)
5422 {
5423 bool is_store = false;
5424 gimple vec_stmt = NULL;
5425 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5426 bool done;
5427
5428 switch (STMT_VINFO_TYPE (stmt_info))
5429 {
5430 case type_demotion_vec_info_type:
5431 case type_promotion_vec_info_type:
5432 case type_conversion_vec_info_type:
5433 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5434 gcc_assert (done);
5435 break;
5436
5437 case induc_vec_info_type:
5438 gcc_assert (!slp_node);
5439 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5440 gcc_assert (done);
5441 break;
5442
5443 case shift_vec_info_type:
5444 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5445 gcc_assert (done);
5446 break;
5447
5448 case op_vec_info_type:
5449 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5450 gcc_assert (done);
5451 break;
5452
5453 case assignment_vec_info_type:
5454 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5455 gcc_assert (done);
5456 break;
5457
5458 case load_vec_info_type:
5459 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5460 slp_node_instance);
5461 gcc_assert (done);
5462 break;
5463
5464 case store_vec_info_type:
5465 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5466 gcc_assert (done);
5467 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5468 {
5469 /* In case of interleaving, the whole chain is vectorized when the
5470 last store in the chain is reached. Store stmts before the last
5471 one are skipped, and there vec_stmt_info shouldn't be freed
5472 meanwhile. */
5473 *strided_store = true;
5474 if (STMT_VINFO_VEC_STMT (stmt_info))
5475 is_store = true;
5476 }
5477 else
5478 is_store = true;
5479 break;
5480
5481 case condition_vec_info_type:
5482 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5483 gcc_assert (done);
5484 break;
5485
5486 case call_vec_info_type:
5487 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5488 stmt = gsi_stmt (*gsi);
5489 break;
5490
5491 case reduc_vec_info_type:
5492 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5493 gcc_assert (done);
5494 break;
5495
5496 default:
5497 if (!STMT_VINFO_LIVE_P (stmt_info))
5498 {
5499 if (vect_print_dump_info (REPORT_DETAILS))
5500 fprintf (vect_dump, "stmt not supported.");
5501 gcc_unreachable ();
5502 }
5503 }
5504
5505 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5506 is being vectorized, but outside the immediately enclosing loop. */
5507 if (vec_stmt
5508 && STMT_VINFO_LOOP_VINFO (stmt_info)
5509 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5510 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5511 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5512 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5513 || STMT_VINFO_RELEVANT (stmt_info) ==
5514 vect_used_in_outer_by_reduction))
5515 {
5516 struct loop *innerloop = LOOP_VINFO_LOOP (
5517 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5518 imm_use_iterator imm_iter;
5519 use_operand_p use_p;
5520 tree scalar_dest;
5521 gimple exit_phi;
5522
5523 if (vect_print_dump_info (REPORT_DETAILS))
5524 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5525
5526 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5527 (to be used when vectorizing outer-loop stmts that use the DEF of
5528 STMT). */
5529 if (gimple_code (stmt) == GIMPLE_PHI)
5530 scalar_dest = PHI_RESULT (stmt);
5531 else
5532 scalar_dest = gimple_assign_lhs (stmt);
5533
5534 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5535 {
5536 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5537 {
5538 exit_phi = USE_STMT (use_p);
5539 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5540 }
5541 }
5542 }
5543
5544 /* Handle stmts whose DEF is used outside the loop-nest that is
5545 being vectorized. */
5546 if (STMT_VINFO_LIVE_P (stmt_info)
5547 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5548 {
5549 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5550 gcc_assert (done);
5551 }
5552
5553 if (vec_stmt)
5554 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5555
5556 return is_store;
5557 }
5558
5559
5560 /* Remove a group of stores (for SLP or interleaving), free their
5561 stmt_vec_info. */
5562
5563 void
5564 vect_remove_stores (gimple first_stmt)
5565 {
5566 gimple next = first_stmt;
5567 gimple tmp;
5568 gimple_stmt_iterator next_si;
5569
5570 while (next)
5571 {
5572 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5573
5574 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5575 if (is_pattern_stmt_p (stmt_info))
5576 next = STMT_VINFO_RELATED_STMT (stmt_info);
5577 /* Free the attached stmt_vec_info and remove the stmt. */
5578 next_si = gsi_for_stmt (next);
5579 gsi_remove (&next_si, true);
5580 free_stmt_vec_info (next);
5581 next = tmp;
5582 }
5583 }
5584
5585
5586 /* Function new_stmt_vec_info.
5587
5588 Create and initialize a new stmt_vec_info struct for STMT. */
5589
5590 stmt_vec_info
5591 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5592 bb_vec_info bb_vinfo)
5593 {
5594 stmt_vec_info res;
5595 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5596
5597 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5598 STMT_VINFO_STMT (res) = stmt;
5599 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5600 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5601 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5602 STMT_VINFO_LIVE_P (res) = false;
5603 STMT_VINFO_VECTYPE (res) = NULL;
5604 STMT_VINFO_VEC_STMT (res) = NULL;
5605 STMT_VINFO_VECTORIZABLE (res) = true;
5606 STMT_VINFO_IN_PATTERN_P (res) = false;
5607 STMT_VINFO_RELATED_STMT (res) = NULL;
5608 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5609 STMT_VINFO_DATA_REF (res) = NULL;
5610
5611 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5612 STMT_VINFO_DR_OFFSET (res) = NULL;
5613 STMT_VINFO_DR_INIT (res) = NULL;
5614 STMT_VINFO_DR_STEP (res) = NULL;
5615 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5616
5617 if (gimple_code (stmt) == GIMPLE_PHI
5618 && is_loop_header_bb_p (gimple_bb (stmt)))
5619 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5620 else
5621 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5622
5623 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5624 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5625 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5626 STMT_SLP_TYPE (res) = loop_vect;
5627 GROUP_FIRST_ELEMENT (res) = NULL;
5628 GROUP_NEXT_ELEMENT (res) = NULL;
5629 GROUP_SIZE (res) = 0;
5630 GROUP_STORE_COUNT (res) = 0;
5631 GROUP_GAP (res) = 0;
5632 GROUP_SAME_DR_STMT (res) = NULL;
5633 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5634
5635 return res;
5636 }
5637
5638
5639 /* Create a hash table for stmt_vec_info. */
5640
5641 void
5642 init_stmt_vec_info_vec (void)
5643 {
5644 gcc_assert (!stmt_vec_info_vec);
5645 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5646 }
5647
5648
5649 /* Free hash table for stmt_vec_info. */
5650
5651 void
5652 free_stmt_vec_info_vec (void)
5653 {
5654 gcc_assert (stmt_vec_info_vec);
5655 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5656 }
5657
5658
5659 /* Free stmt vectorization related info. */
5660
5661 void
5662 free_stmt_vec_info (gimple stmt)
5663 {
5664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5665
5666 if (!stmt_info)
5667 return;
5668
5669 /* Check if this statement has a related "pattern stmt"
5670 (introduced by the vectorizer during the pattern recognition
5671 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5672 too. */
5673 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5674 {
5675 stmt_vec_info patt_info
5676 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5677 if (patt_info)
5678 {
5679 if (STMT_VINFO_PATTERN_DEF_STMT (patt_info))
5680 free_stmt_vec_info (STMT_VINFO_PATTERN_DEF_STMT (patt_info));
5681 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5682 }
5683 }
5684
5685 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5686 set_vinfo_for_stmt (stmt, NULL);
5687 free (stmt_info);
5688 }
5689
5690
5691 /* Function get_vectype_for_scalar_type_and_size.
5692
5693 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5694 by the target. */
5695
5696 static tree
5697 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5698 {
5699 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5700 enum machine_mode simd_mode;
5701 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5702 int nunits;
5703 tree vectype;
5704
5705 if (nbytes == 0)
5706 return NULL_TREE;
5707
5708 /* We can't build a vector type of elements with alignment bigger than
5709 their size. */
5710 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5711 return NULL_TREE;
5712
5713 /* For vector types of elements whose mode precision doesn't
5714 match their types precision we use a element type of mode
5715 precision. The vectorization routines will have to make sure
5716 they support the proper result truncation/extension. */
5717 if (INTEGRAL_TYPE_P (scalar_type)
5718 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5719 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5720 TYPE_UNSIGNED (scalar_type));
5721
5722 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5723 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5724 return NULL_TREE;
5725
5726 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5727 When the component mode passes the above test simply use a type
5728 corresponding to that mode. The theory is that any use that
5729 would cause problems with this will disable vectorization anyway. */
5730 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5731 && !INTEGRAL_TYPE_P (scalar_type)
5732 && !POINTER_TYPE_P (scalar_type))
5733 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5734
5735 /* If no size was supplied use the mode the target prefers. Otherwise
5736 lookup a vector mode of the specified size. */
5737 if (size == 0)
5738 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5739 else
5740 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5741 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5742 if (nunits <= 1)
5743 return NULL_TREE;
5744
5745 vectype = build_vector_type (scalar_type, nunits);
5746 if (vect_print_dump_info (REPORT_DETAILS))
5747 {
5748 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5749 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5750 }
5751
5752 if (!vectype)
5753 return NULL_TREE;
5754
5755 if (vect_print_dump_info (REPORT_DETAILS))
5756 {
5757 fprintf (vect_dump, "vectype: ");
5758 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5759 }
5760
5761 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5762 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5763 {
5764 if (vect_print_dump_info (REPORT_DETAILS))
5765 fprintf (vect_dump, "mode not supported by target.");
5766 return NULL_TREE;
5767 }
5768
5769 return vectype;
5770 }
5771
5772 unsigned int current_vector_size;
5773
5774 /* Function get_vectype_for_scalar_type.
5775
5776 Returns the vector type corresponding to SCALAR_TYPE as supported
5777 by the target. */
5778
5779 tree
5780 get_vectype_for_scalar_type (tree scalar_type)
5781 {
5782 tree vectype;
5783 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5784 current_vector_size);
5785 if (vectype
5786 && current_vector_size == 0)
5787 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5788 return vectype;
5789 }
5790
5791 /* Function get_same_sized_vectype
5792
5793 Returns a vector type corresponding to SCALAR_TYPE of size
5794 VECTOR_TYPE if supported by the target. */
5795
5796 tree
5797 get_same_sized_vectype (tree scalar_type, tree vector_type)
5798 {
5799 return get_vectype_for_scalar_type_and_size
5800 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5801 }
5802
5803 /* Function vect_is_simple_use.
5804
5805 Input:
5806 LOOP_VINFO - the vect info of the loop that is being vectorized.
5807 BB_VINFO - the vect info of the basic block that is being vectorized.
5808 OPERAND - operand of a stmt in the loop or bb.
5809 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5810
5811 Returns whether a stmt with OPERAND can be vectorized.
5812 For loops, supportable operands are constants, loop invariants, and operands
5813 that are defined by the current iteration of the loop. Unsupportable
5814 operands are those that are defined by a previous iteration of the loop (as
5815 is the case in reduction/induction computations).
5816 For basic blocks, supportable operands are constants and bb invariants.
5817 For now, operands defined outside the basic block are not supported. */
5818
5819 bool
5820 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5821 bb_vec_info bb_vinfo, gimple *def_stmt,
5822 tree *def, enum vect_def_type *dt)
5823 {
5824 basic_block bb;
5825 stmt_vec_info stmt_vinfo;
5826 struct loop *loop = NULL;
5827
5828 if (loop_vinfo)
5829 loop = LOOP_VINFO_LOOP (loop_vinfo);
5830
5831 *def_stmt = NULL;
5832 *def = NULL_TREE;
5833
5834 if (vect_print_dump_info (REPORT_DETAILS))
5835 {
5836 fprintf (vect_dump, "vect_is_simple_use: operand ");
5837 print_generic_expr (vect_dump, operand, TDF_SLIM);
5838 }
5839
5840 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5841 {
5842 *dt = vect_constant_def;
5843 return true;
5844 }
5845
5846 if (is_gimple_min_invariant (operand))
5847 {
5848 *def = operand;
5849 *dt = vect_external_def;
5850 return true;
5851 }
5852
5853 if (TREE_CODE (operand) == PAREN_EXPR)
5854 {
5855 if (vect_print_dump_info (REPORT_DETAILS))
5856 fprintf (vect_dump, "non-associatable copy.");
5857 operand = TREE_OPERAND (operand, 0);
5858 }
5859
5860 if (TREE_CODE (operand) != SSA_NAME)
5861 {
5862 if (vect_print_dump_info (REPORT_DETAILS))
5863 fprintf (vect_dump, "not ssa-name.");
5864 return false;
5865 }
5866
5867 *def_stmt = SSA_NAME_DEF_STMT (operand);
5868 if (*def_stmt == NULL)
5869 {
5870 if (vect_print_dump_info (REPORT_DETAILS))
5871 fprintf (vect_dump, "no def_stmt.");
5872 return false;
5873 }
5874
5875 if (vect_print_dump_info (REPORT_DETAILS))
5876 {
5877 fprintf (vect_dump, "def_stmt: ");
5878 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5879 }
5880
5881 /* Empty stmt is expected only in case of a function argument.
5882 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5883 if (gimple_nop_p (*def_stmt))
5884 {
5885 *def = operand;
5886 *dt = vect_external_def;
5887 return true;
5888 }
5889
5890 bb = gimple_bb (*def_stmt);
5891
5892 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5893 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5894 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5895 *dt = vect_external_def;
5896 else
5897 {
5898 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5899 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5900 }
5901
5902 if (*dt == vect_unknown_def_type)
5903 {
5904 if (vect_print_dump_info (REPORT_DETAILS))
5905 fprintf (vect_dump, "Unsupported pattern.");
5906 return false;
5907 }
5908
5909 if (vect_print_dump_info (REPORT_DETAILS))
5910 fprintf (vect_dump, "type of def: %d.",*dt);
5911
5912 switch (gimple_code (*def_stmt))
5913 {
5914 case GIMPLE_PHI:
5915 *def = gimple_phi_result (*def_stmt);
5916 break;
5917
5918 case GIMPLE_ASSIGN:
5919 *def = gimple_assign_lhs (*def_stmt);
5920 break;
5921
5922 case GIMPLE_CALL:
5923 *def = gimple_call_lhs (*def_stmt);
5924 if (*def != NULL)
5925 break;
5926 /* FALLTHRU */
5927 default:
5928 if (vect_print_dump_info (REPORT_DETAILS))
5929 fprintf (vect_dump, "unsupported defining stmt: ");
5930 return false;
5931 }
5932
5933 return true;
5934 }
5935
5936 /* Function vect_is_simple_use_1.
5937
5938 Same as vect_is_simple_use_1 but also determines the vector operand
5939 type of OPERAND and stores it to *VECTYPE. If the definition of
5940 OPERAND is vect_uninitialized_def, vect_constant_def or
5941 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5942 is responsible to compute the best suited vector type for the
5943 scalar operand. */
5944
5945 bool
5946 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5947 bb_vec_info bb_vinfo, gimple *def_stmt,
5948 tree *def, enum vect_def_type *dt, tree *vectype)
5949 {
5950 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5951 return false;
5952
5953 /* Now get a vector type if the def is internal, otherwise supply
5954 NULL_TREE and leave it up to the caller to figure out a proper
5955 type for the use stmt. */
5956 if (*dt == vect_internal_def
5957 || *dt == vect_induction_def
5958 || *dt == vect_reduction_def
5959 || *dt == vect_double_reduction_def
5960 || *dt == vect_nested_cycle)
5961 {
5962 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5963
5964 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5965 && !STMT_VINFO_RELEVANT (stmt_info)
5966 && !STMT_VINFO_LIVE_P (stmt_info))
5967 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5968
5969 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5970 gcc_assert (*vectype != NULL_TREE);
5971 }
5972 else if (*dt == vect_uninitialized_def
5973 || *dt == vect_constant_def
5974 || *dt == vect_external_def)
5975 *vectype = NULL_TREE;
5976 else
5977 gcc_unreachable ();
5978
5979 return true;
5980 }
5981
5982
5983 /* Function supportable_widening_operation
5984
5985 Check whether an operation represented by the code CODE is a
5986 widening operation that is supported by the target platform in
5987 vector form (i.e., when operating on arguments of type VECTYPE_IN
5988 producing a result of type VECTYPE_OUT).
5989
5990 Widening operations we currently support are NOP (CONVERT), FLOAT
5991 and WIDEN_MULT. This function checks if these operations are supported
5992 by the target platform either directly (via vector tree-codes), or via
5993 target builtins.
5994
5995 Output:
5996 - CODE1 and CODE2 are codes of vector operations to be used when
5997 vectorizing the operation, if available.
5998 - DECL1 and DECL2 are decls of target builtin functions to be used
5999 when vectorizing the operation, if available. In this case,
6000 CODE1 and CODE2 are CALL_EXPR.
6001 - MULTI_STEP_CVT determines the number of required intermediate steps in
6002 case of multi-step conversion (like char->short->int - in that case
6003 MULTI_STEP_CVT will be 1).
6004 - INTERM_TYPES contains the intermediate type required to perform the
6005 widening operation (short in the above example). */
6006
6007 bool
6008 supportable_widening_operation (enum tree_code code, gimple stmt,
6009 tree vectype_out, tree vectype_in,
6010 tree *decl1, tree *decl2,
6011 enum tree_code *code1, enum tree_code *code2,
6012 int *multi_step_cvt,
6013 VEC (tree, heap) **interm_types)
6014 {
6015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6016 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6017 struct loop *vect_loop = NULL;
6018 bool ordered_p;
6019 enum machine_mode vec_mode;
6020 enum insn_code icode1, icode2;
6021 optab optab1, optab2;
6022 tree vectype = vectype_in;
6023 tree wide_vectype = vectype_out;
6024 enum tree_code c1, c2;
6025 int i;
6026 tree prev_type, intermediate_type;
6027 enum machine_mode intermediate_mode, prev_mode;
6028 optab optab3, optab4;
6029
6030 *multi_step_cvt = 0;
6031 if (loop_info)
6032 vect_loop = LOOP_VINFO_LOOP (loop_info);
6033
6034 /* The result of a vectorized widening operation usually requires two vectors
6035 (because the widened results do not fit into one vector). The generated
6036 vector results would normally be expected to be generated in the same
6037 order as in the original scalar computation, i.e. if 8 results are
6038 generated in each vector iteration, they are to be organized as follows:
6039 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6040
6041 However, in the special case that the result of the widening operation is
6042 used in a reduction computation only, the order doesn't matter (because
6043 when vectorizing a reduction we change the order of the computation).
6044 Some targets can take advantage of this and generate more efficient code.
6045 For example, targets like Altivec, that support widen_mult using a sequence
6046 of {mult_even,mult_odd} generate the following vectors:
6047 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6048
6049 When vectorizing outer-loops, we execute the inner-loop sequentially
6050 (each vectorized inner-loop iteration contributes to VF outer-loop
6051 iterations in parallel). We therefore don't allow to change the order
6052 of the computation in the inner-loop during outer-loop vectorization. */
6053
6054 if (vect_loop
6055 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6056 && !nested_in_vect_loop_p (vect_loop, stmt))
6057 ordered_p = false;
6058 else
6059 ordered_p = true;
6060
6061 if (!ordered_p
6062 && code == WIDEN_MULT_EXPR
6063 && targetm.vectorize.builtin_mul_widen_even
6064 && targetm.vectorize.builtin_mul_widen_even (vectype)
6065 && targetm.vectorize.builtin_mul_widen_odd
6066 && targetm.vectorize.builtin_mul_widen_odd (vectype))
6067 {
6068 if (vect_print_dump_info (REPORT_DETAILS))
6069 fprintf (vect_dump, "Unordered widening operation detected.");
6070
6071 *code1 = *code2 = CALL_EXPR;
6072 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6073 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6074 return true;
6075 }
6076
6077 switch (code)
6078 {
6079 case WIDEN_MULT_EXPR:
6080 c1 = VEC_WIDEN_MULT_LO_EXPR;
6081 c2 = VEC_WIDEN_MULT_HI_EXPR;
6082 break;
6083
6084 case WIDEN_LSHIFT_EXPR:
6085 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6086 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6087 break;
6088
6089 CASE_CONVERT:
6090 c1 = VEC_UNPACK_LO_EXPR;
6091 c2 = VEC_UNPACK_HI_EXPR;
6092 break;
6093
6094 case FLOAT_EXPR:
6095 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6096 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6097 break;
6098
6099 case FIX_TRUNC_EXPR:
6100 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6101 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6102 computing the operation. */
6103 return false;
6104
6105 default:
6106 gcc_unreachable ();
6107 }
6108
6109 if (BYTES_BIG_ENDIAN)
6110 {
6111 enum tree_code ctmp = c1;
6112 c1 = c2;
6113 c2 = ctmp;
6114 }
6115
6116 if (code == FIX_TRUNC_EXPR)
6117 {
6118 /* The signedness is determined from output operand. */
6119 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6120 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6121 }
6122 else
6123 {
6124 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6125 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6126 }
6127
6128 if (!optab1 || !optab2)
6129 return false;
6130
6131 vec_mode = TYPE_MODE (vectype);
6132 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6133 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6134 return false;
6135
6136 *code1 = c1;
6137 *code2 = c2;
6138
6139 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6140 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6141 return true;
6142
6143 /* Check if it's a multi-step conversion that can be done using intermediate
6144 types. */
6145
6146 prev_type = vectype;
6147 prev_mode = vec_mode;
6148
6149 if (!CONVERT_EXPR_CODE_P (code))
6150 return false;
6151
6152 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6153 intermediate steps in promotion sequence. We try
6154 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6155 not. */
6156 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6157 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6158 {
6159 intermediate_mode = insn_data[icode1].operand[0].mode;
6160 intermediate_type
6161 = lang_hooks.types.type_for_mode (intermediate_mode,
6162 TYPE_UNSIGNED (prev_type));
6163 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6164 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6165
6166 if (!optab3 || !optab4
6167 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6168 || insn_data[icode1].operand[0].mode != intermediate_mode
6169 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6170 || insn_data[icode2].operand[0].mode != intermediate_mode
6171 || ((icode1 = optab_handler (optab3, intermediate_mode))
6172 == CODE_FOR_nothing)
6173 || ((icode2 = optab_handler (optab4, intermediate_mode))
6174 == CODE_FOR_nothing))
6175 break;
6176
6177 VEC_quick_push (tree, *interm_types, intermediate_type);
6178 (*multi_step_cvt)++;
6179
6180 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6181 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6182 return true;
6183
6184 prev_type = intermediate_type;
6185 prev_mode = intermediate_mode;
6186 }
6187
6188 VEC_free (tree, heap, *interm_types);
6189 return false;
6190 }
6191
6192
6193 /* Function supportable_narrowing_operation
6194
6195 Check whether an operation represented by the code CODE is a
6196 narrowing operation that is supported by the target platform in
6197 vector form (i.e., when operating on arguments of type VECTYPE_IN
6198 and producing a result of type VECTYPE_OUT).
6199
6200 Narrowing operations we currently support are NOP (CONVERT) and
6201 FIX_TRUNC. This function checks if these operations are supported by
6202 the target platform directly via vector tree-codes.
6203
6204 Output:
6205 - CODE1 is the code of a vector operation to be used when
6206 vectorizing the operation, if available.
6207 - MULTI_STEP_CVT determines the number of required intermediate steps in
6208 case of multi-step conversion (like int->short->char - in that case
6209 MULTI_STEP_CVT will be 1).
6210 - INTERM_TYPES contains the intermediate type required to perform the
6211 narrowing operation (short in the above example). */
6212
6213 bool
6214 supportable_narrowing_operation (enum tree_code code,
6215 tree vectype_out, tree vectype_in,
6216 enum tree_code *code1, int *multi_step_cvt,
6217 VEC (tree, heap) **interm_types)
6218 {
6219 enum machine_mode vec_mode;
6220 enum insn_code icode1;
6221 optab optab1, interm_optab;
6222 tree vectype = vectype_in;
6223 tree narrow_vectype = vectype_out;
6224 enum tree_code c1;
6225 tree intermediate_type;
6226 enum machine_mode intermediate_mode, prev_mode;
6227 int i;
6228 bool uns;
6229
6230 *multi_step_cvt = 0;
6231 switch (code)
6232 {
6233 CASE_CONVERT:
6234 c1 = VEC_PACK_TRUNC_EXPR;
6235 break;
6236
6237 case FIX_TRUNC_EXPR:
6238 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6239 break;
6240
6241 case FLOAT_EXPR:
6242 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6243 tree code and optabs used for computing the operation. */
6244 return false;
6245
6246 default:
6247 gcc_unreachable ();
6248 }
6249
6250 if (code == FIX_TRUNC_EXPR)
6251 /* The signedness is determined from output operand. */
6252 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6253 else
6254 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6255
6256 if (!optab1)
6257 return false;
6258
6259 vec_mode = TYPE_MODE (vectype);
6260 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6261 return false;
6262
6263 *code1 = c1;
6264
6265 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6266 return true;
6267
6268 /* Check if it's a multi-step conversion that can be done using intermediate
6269 types. */
6270 prev_mode = vec_mode;
6271 if (code == FIX_TRUNC_EXPR)
6272 uns = TYPE_UNSIGNED (vectype_out);
6273 else
6274 uns = TYPE_UNSIGNED (vectype);
6275
6276 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6277 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6278 costly than signed. */
6279 if (code == FIX_TRUNC_EXPR && uns)
6280 {
6281 enum insn_code icode2;
6282
6283 intermediate_type
6284 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6285 interm_optab
6286 = optab_for_tree_code (c1, intermediate_type, optab_default);
6287 if (interm_optab != NULL
6288 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6289 && insn_data[icode1].operand[0].mode
6290 == insn_data[icode2].operand[0].mode)
6291 {
6292 uns = false;
6293 optab1 = interm_optab;
6294 icode1 = icode2;
6295 }
6296 }
6297
6298 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6299 intermediate steps in promotion sequence. We try
6300 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6301 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6302 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6303 {
6304 intermediate_mode = insn_data[icode1].operand[0].mode;
6305 intermediate_type
6306 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6307 interm_optab
6308 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6309 optab_default);
6310 if (!interm_optab
6311 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6312 || insn_data[icode1].operand[0].mode != intermediate_mode
6313 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6314 == CODE_FOR_nothing))
6315 break;
6316
6317 VEC_quick_push (tree, *interm_types, intermediate_type);
6318 (*multi_step_cvt)++;
6319
6320 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6321 return true;
6322
6323 prev_mode = intermediate_mode;
6324 optab1 = interm_optab;
6325 }
6326
6327 VEC_free (tree, heap, *interm_types);
6328 return false;
6329 }