abccd923b44f540359ea31c8bb4bc4e11b11a9ab
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43
44
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
46
47 static tree
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49 {
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 "vect_array");
52 }
53
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
58
59 static tree
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
62 {
63 tree vect_type, vect, vect_name, array_ref;
64 gimple new_stmt;
65
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
72
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
78
79 return vect_name;
80 }
81
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
85
86 static void
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
89 {
90 tree array_ref;
91 gimple new_stmt;
92
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
96
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
100 }
101
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
104 (and its group). */
105
106 static tree
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
108 {
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
111
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
117 pi->misalign = 0;
118 return mem_ref;
119 }
120
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
122
123 /* Function vect_mark_relevant.
124
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
126
127 static void
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
131 {
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
135 gimple pattern_stmt;
136
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
139
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
145 {
146 bool found = false;
147 if (!used_in_pattern)
148 {
149 imm_use_iterator imm_iter;
150 use_operand_p use_p;
151 gimple use_stmt;
152 tree lhs;
153
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
156 else
157 lhs = gimple_call_lhs (stmt);
158
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
161 stmt. */
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
164 {
165 if (is_gimple_debug (USE_STMT (use_p)))
166 continue;
167 use_stmt = USE_STMT (use_p);
168
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
171 {
172 found = true;
173 break;
174 }
175 }
176 }
177
178 if (!found)
179 {
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
184
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
186
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
189 " relevant/live.");
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
194 stmt = pattern_stmt;
195 }
196 }
197
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
201
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
204 {
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
207 return;
208 }
209
210 VEC_safe_push (gimple, heap, *worklist, stmt);
211 }
212
213
214 /* Function vect_stmt_relevant_p.
215
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
218
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
223
224 CHECKME: what other side effects would the vectorizer allow? */
225
226 static bool
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
229 {
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231 ssa_op_iter op_iter;
232 imm_use_iterator imm_iter;
233 use_operand_p use_p;
234 def_operand_p def_p;
235
236 *relevant = vect_unused_in_scope;
237 *live_p = false;
238
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
244
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
248 {
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
252 }
253
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
256 {
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
258 {
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
261 {
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
264
265 if (is_gimple_debug (USE_STMT (use_p)))
266 continue;
267
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
272
273 *live_p = true;
274 }
275 }
276 }
277
278 return (*live_p || *relevant);
279 }
280
281
282 /* Function exist_non_indexing_operands_for_use_p
283
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
286
287 static bool
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
289 {
290 tree operand;
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
292
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
297 return true;
298
299 /* STMT has a data_ref. FORNOW this means that its of one of
300 the following forms:
301 -1- ARRAY_REF = var
302 -2- var = ARRAY_REF
303 (This should have been verified in analyze_data_refs).
304
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
307 for array indexing.
308
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
311
312 if (!gimple_assign_copy_p (stmt))
313 return false;
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
315 return false;
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
318 return false;
319
320 if (operand == use)
321 return true;
322
323 return false;
324 }
325
326
327 /*
328 Function process_use.
329
330 Inputs:
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
335 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
336 be performed.
337
338 Outputs:
339 Generally, LIVE_P and RELEVANT are used to define the liveness and
340 relevance info of the DEF_STMT of this USE:
341 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
342 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
343 Exceptions:
344 - case 1: If USE is used only for address computations (e.g. array indexing),
345 which does not need to be directly vectorized, then the liveness/relevance
346 of the respective DEF_STMT is left unchanged.
347 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
348 skip DEF_STMT cause it had already been processed.
349 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
350 be modified accordingly.
351
352 Return true if everything is as expected. Return false otherwise. */
353
354 static bool
355 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
356 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
357 bool force)
358 {
359 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
360 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
361 stmt_vec_info dstmt_vinfo;
362 basic_block bb, def_bb;
363 tree def;
364 gimple def_stmt;
365 enum vect_def_type dt;
366
367 /* case 1: we are only interested in uses that need to be vectorized. Uses
368 that are used for address computation are not considered relevant. */
369 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
370 return true;
371
372 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
373 {
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
375 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
376 return false;
377 }
378
379 if (!def_stmt || gimple_nop_p (def_stmt))
380 return true;
381
382 def_bb = gimple_bb (def_stmt);
383 if (!flow_bb_inside_loop_p (loop, def_bb))
384 {
385 if (vect_print_dump_info (REPORT_DETAILS))
386 fprintf (vect_dump, "def_stmt is out of loop.");
387 return true;
388 }
389
390 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
391 DEF_STMT must have already been processed, because this should be the
392 only way that STMT, which is a reduction-phi, was put in the worklist,
393 as there should be no other uses for DEF_STMT in the loop. So we just
394 check that everything is as expected, and we are done. */
395 dstmt_vinfo = vinfo_for_stmt (def_stmt);
396 bb = gimple_bb (stmt);
397 if (gimple_code (stmt) == GIMPLE_PHI
398 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
399 && gimple_code (def_stmt) != GIMPLE_PHI
400 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
401 && bb->loop_father == def_bb->loop_father)
402 {
403 if (vect_print_dump_info (REPORT_DETAILS))
404 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
405 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
406 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
407 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
408 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
409 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
410 return true;
411 }
412
413 /* case 3a: outer-loop stmt defining an inner-loop stmt:
414 outer-loop-header-bb:
415 d = def_stmt
416 inner-loop:
417 stmt # use (d)
418 outer-loop-tail-bb:
419 ... */
420 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
421 {
422 if (vect_print_dump_info (REPORT_DETAILS))
423 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
424
425 switch (relevant)
426 {
427 case vect_unused_in_scope:
428 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
429 vect_used_in_scope : vect_unused_in_scope;
430 break;
431
432 case vect_used_in_outer_by_reduction:
433 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
434 relevant = vect_used_by_reduction;
435 break;
436
437 case vect_used_in_outer:
438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439 relevant = vect_used_in_scope;
440 break;
441
442 case vect_used_in_scope:
443 break;
444
445 default:
446 gcc_unreachable ();
447 }
448 }
449
450 /* case 3b: inner-loop stmt defining an outer-loop stmt:
451 outer-loop-header-bb:
452 ...
453 inner-loop:
454 d = def_stmt
455 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
456 stmt # use (d) */
457 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
458 {
459 if (vect_print_dump_info (REPORT_DETAILS))
460 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
461
462 switch (relevant)
463 {
464 case vect_unused_in_scope:
465 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
466 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
467 vect_used_in_outer_by_reduction : vect_unused_in_scope;
468 break;
469
470 case vect_used_by_reduction:
471 relevant = vect_used_in_outer_by_reduction;
472 break;
473
474 case vect_used_in_scope:
475 relevant = vect_used_in_outer;
476 break;
477
478 default:
479 gcc_unreachable ();
480 }
481 }
482
483 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
484 is_pattern_stmt_p (stmt_vinfo));
485 return true;
486 }
487
488
489 /* Function vect_mark_stmts_to_be_vectorized.
490
491 Not all stmts in the loop need to be vectorized. For example:
492
493 for i...
494 for j...
495 1. T0 = i + j
496 2. T1 = a[T0]
497
498 3. j = j + 1
499
500 Stmt 1 and 3 do not need to be vectorized, because loop control and
501 addressing of vectorized data-refs are handled differently.
502
503 This pass detects such stmts. */
504
505 bool
506 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
507 {
508 VEC(gimple,heap) *worklist;
509 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
510 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
511 unsigned int nbbs = loop->num_nodes;
512 gimple_stmt_iterator si;
513 gimple stmt;
514 unsigned int i;
515 stmt_vec_info stmt_vinfo;
516 basic_block bb;
517 gimple phi;
518 bool live_p;
519 enum vect_relevant relevant, tmp_relevant;
520 enum vect_def_type def_type;
521
522 if (vect_print_dump_info (REPORT_DETAILS))
523 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
524
525 worklist = VEC_alloc (gimple, heap, 64);
526
527 /* 1. Init worklist. */
528 for (i = 0; i < nbbs; i++)
529 {
530 bb = bbs[i];
531 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
532 {
533 phi = gsi_stmt (si);
534 if (vect_print_dump_info (REPORT_DETAILS))
535 {
536 fprintf (vect_dump, "init: phi relevant? ");
537 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
538 }
539
540 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
541 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
542 }
543 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
544 {
545 stmt = gsi_stmt (si);
546 if (vect_print_dump_info (REPORT_DETAILS))
547 {
548 fprintf (vect_dump, "init: stmt relevant? ");
549 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
550 }
551
552 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
553 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
554 }
555 }
556
557 /* 2. Process_worklist */
558 while (VEC_length (gimple, worklist) > 0)
559 {
560 use_operand_p use_p;
561 ssa_op_iter iter;
562
563 stmt = VEC_pop (gimple, worklist);
564 if (vect_print_dump_info (REPORT_DETAILS))
565 {
566 fprintf (vect_dump, "worklist: examine stmt: ");
567 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
568 }
569
570 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
571 (DEF_STMT) as relevant/irrelevant and live/dead according to the
572 liveness and relevance properties of STMT. */
573 stmt_vinfo = vinfo_for_stmt (stmt);
574 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
575 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
576
577 /* Generally, the liveness and relevance properties of STMT are
578 propagated as is to the DEF_STMTs of its USEs:
579 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
580 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
581
582 One exception is when STMT has been identified as defining a reduction
583 variable; in this case we set the liveness/relevance as follows:
584 live_p = false
585 relevant = vect_used_by_reduction
586 This is because we distinguish between two kinds of relevant stmts -
587 those that are used by a reduction computation, and those that are
588 (also) used by a regular computation. This allows us later on to
589 identify stmts that are used solely by a reduction, and therefore the
590 order of the results that they produce does not have to be kept. */
591
592 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
593 tmp_relevant = relevant;
594 switch (def_type)
595 {
596 case vect_reduction_def:
597 switch (tmp_relevant)
598 {
599 case vect_unused_in_scope:
600 relevant = vect_used_by_reduction;
601 break;
602
603 case vect_used_by_reduction:
604 if (gimple_code (stmt) == GIMPLE_PHI)
605 break;
606 /* fall through */
607
608 default:
609 if (vect_print_dump_info (REPORT_DETAILS))
610 fprintf (vect_dump, "unsupported use of reduction.");
611
612 VEC_free (gimple, heap, worklist);
613 return false;
614 }
615
616 live_p = false;
617 break;
618
619 case vect_nested_cycle:
620 if (tmp_relevant != vect_unused_in_scope
621 && tmp_relevant != vect_used_in_outer_by_reduction
622 && tmp_relevant != vect_used_in_outer)
623 {
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "unsupported use of nested cycle.");
626
627 VEC_free (gimple, heap, worklist);
628 return false;
629 }
630
631 live_p = false;
632 break;
633
634 case vect_double_reduction_def:
635 if (tmp_relevant != vect_unused_in_scope
636 && tmp_relevant != vect_used_by_reduction)
637 {
638 if (vect_print_dump_info (REPORT_DETAILS))
639 fprintf (vect_dump, "unsupported use of double reduction.");
640
641 VEC_free (gimple, heap, worklist);
642 return false;
643 }
644
645 live_p = false;
646 break;
647
648 default:
649 break;
650 }
651
652 if (is_pattern_stmt_p (stmt_vinfo))
653 {
654 /* Pattern statements are not inserted into the code, so
655 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
656 have to scan the RHS or function arguments instead. */
657 if (is_gimple_assign (stmt))
658 {
659 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
660 tree op = gimple_assign_rhs1 (stmt);
661
662 i = 1;
663 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
664 {
665 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
666 live_p, relevant, &worklist, false)
667 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
668 live_p, relevant, &worklist, false))
669 {
670 VEC_free (gimple, heap, worklist);
671 return false;
672 }
673 i = 2;
674 }
675 for (; i < gimple_num_ops (stmt); i++)
676 {
677 op = gimple_op (stmt, i);
678 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
679 &worklist, false))
680 {
681 VEC_free (gimple, heap, worklist);
682 return false;
683 }
684 }
685 }
686 else if (is_gimple_call (stmt))
687 {
688 for (i = 0; i < gimple_call_num_args (stmt); i++)
689 {
690 tree arg = gimple_call_arg (stmt, i);
691 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
692 &worklist, false))
693 {
694 VEC_free (gimple, heap, worklist);
695 return false;
696 }
697 }
698 }
699 }
700 else
701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
702 {
703 tree op = USE_FROM_PTR (use_p);
704 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
705 &worklist, false))
706 {
707 VEC_free (gimple, heap, worklist);
708 return false;
709 }
710 }
711
712 if (STMT_VINFO_GATHER_P (stmt_vinfo))
713 {
714 tree off;
715 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
716 gcc_assert (decl);
717 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
718 &worklist, true))
719 {
720 VEC_free (gimple, heap, worklist);
721 return false;
722 }
723 }
724 } /* while worklist */
725
726 VEC_free (gimple, heap, worklist);
727 return true;
728 }
729
730
731 /* Get cost by calling cost target builtin. */
732
733 static inline
734 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
735 {
736 tree dummy_type = NULL;
737 int dummy = 0;
738
739 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
740 dummy_type, dummy);
741 }
742
743
744 /* Get cost for STMT. */
745
746 int
747 cost_for_stmt (gimple stmt)
748 {
749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
750
751 switch (STMT_VINFO_TYPE (stmt_info))
752 {
753 case load_vec_info_type:
754 return vect_get_stmt_cost (scalar_load);
755 case store_vec_info_type:
756 return vect_get_stmt_cost (scalar_store);
757 case op_vec_info_type:
758 case condition_vec_info_type:
759 case assignment_vec_info_type:
760 case reduc_vec_info_type:
761 case induc_vec_info_type:
762 case type_promotion_vec_info_type:
763 case type_demotion_vec_info_type:
764 case type_conversion_vec_info_type:
765 case call_vec_info_type:
766 return vect_get_stmt_cost (scalar_stmt);
767 case undef_vec_info_type:
768 default:
769 gcc_unreachable ();
770 }
771 }
772
773 /* Function vect_model_simple_cost.
774
775 Models cost for simple operations, i.e. those that only emit ncopies of a
776 single op. Right now, this does not account for multiple insns that could
777 be generated for the single vector op. We will handle that shortly. */
778
779 void
780 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
781 enum vect_def_type *dt, slp_tree slp_node)
782 {
783 int i;
784 int inside_cost = 0, outside_cost = 0;
785
786 /* The SLP costs were already calculated during SLP tree build. */
787 if (PURE_SLP_STMT (stmt_info))
788 return;
789
790 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
791
792 /* FORNOW: Assuming maximum 2 args per stmts. */
793 for (i = 0; i < 2; i++)
794 {
795 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
796 outside_cost += vect_get_stmt_cost (vector_stmt);
797 }
798
799 if (vect_print_dump_info (REPORT_COST))
800 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
801 "outside_cost = %d .", inside_cost, outside_cost);
802
803 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
804 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
805 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
806 }
807
808
809 /* Function vect_cost_strided_group_size
810
811 For strided load or store, return the group_size only if it is the first
812 load or store of a group, else return 1. This ensures that group size is
813 only returned once per group. */
814
815 static int
816 vect_cost_strided_group_size (stmt_vec_info stmt_info)
817 {
818 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
819
820 if (first_stmt == STMT_VINFO_STMT (stmt_info))
821 return GROUP_SIZE (stmt_info);
822
823 return 1;
824 }
825
826
827 /* Function vect_model_store_cost
828
829 Models cost for stores. In the case of strided accesses, one access
830 has the overhead of the strided access attributed to it. */
831
832 void
833 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
834 bool store_lanes_p, enum vect_def_type dt,
835 slp_tree slp_node)
836 {
837 int group_size;
838 unsigned int inside_cost = 0, outside_cost = 0;
839 struct data_reference *first_dr;
840 gimple first_stmt;
841
842 /* The SLP costs were already calculated during SLP tree build. */
843 if (PURE_SLP_STMT (stmt_info))
844 return;
845
846 if (dt == vect_constant_def || dt == vect_external_def)
847 outside_cost = vect_get_stmt_cost (scalar_to_vec);
848
849 /* Strided access? */
850 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
851 {
852 if (slp_node)
853 {
854 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
855 group_size = 1;
856 }
857 else
858 {
859 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
860 group_size = vect_cost_strided_group_size (stmt_info);
861 }
862
863 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
864 }
865 /* Not a strided access. */
866 else
867 {
868 group_size = 1;
869 first_dr = STMT_VINFO_DATA_REF (stmt_info);
870 }
871
872 /* We assume that the cost of a single store-lanes instruction is
873 equivalent to the cost of GROUP_SIZE separate stores. If a strided
874 access is instead being provided by a permute-and-store operation,
875 include the cost of the permutes. */
876 if (!store_lanes_p && group_size > 1)
877 {
878 /* Uses a high and low interleave operation for each needed permute. */
879 inside_cost = ncopies * exact_log2(group_size) * group_size
880 * vect_get_stmt_cost (vector_stmt);
881
882 if (vect_print_dump_info (REPORT_COST))
883 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
884 group_size);
885
886 }
887
888 /* Costs of the stores. */
889 vect_get_store_cost (first_dr, ncopies, &inside_cost);
890
891 if (vect_print_dump_info (REPORT_COST))
892 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
893 "outside_cost = %d .", inside_cost, outside_cost);
894
895 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
896 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
897 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
898 }
899
900
901 /* Calculate cost of DR's memory access. */
902 void
903 vect_get_store_cost (struct data_reference *dr, int ncopies,
904 unsigned int *inside_cost)
905 {
906 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
907
908 switch (alignment_support_scheme)
909 {
910 case dr_aligned:
911 {
912 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
913
914 if (vect_print_dump_info (REPORT_COST))
915 fprintf (vect_dump, "vect_model_store_cost: aligned.");
916
917 break;
918 }
919
920 case dr_unaligned_supported:
921 {
922 gimple stmt = DR_STMT (dr);
923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
924 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
925
926 /* Here, we assign an additional cost for the unaligned store. */
927 *inside_cost += ncopies
928 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
929 vectype, DR_MISALIGNMENT (dr));
930
931 if (vect_print_dump_info (REPORT_COST))
932 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
933 "hardware.");
934
935 break;
936 }
937
938 default:
939 gcc_unreachable ();
940 }
941 }
942
943
944 /* Function vect_model_load_cost
945
946 Models cost for loads. In the case of strided accesses, the last access
947 has the overhead of the strided access attributed to it. Since unaligned
948 accesses are supported for loads, we also account for the costs of the
949 access scheme chosen. */
950
951 void
952 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
953 slp_tree slp_node)
954 {
955 int group_size;
956 gimple first_stmt;
957 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
958 unsigned int inside_cost = 0, outside_cost = 0;
959
960 /* The SLP costs were already calculated during SLP tree build. */
961 if (PURE_SLP_STMT (stmt_info))
962 return;
963
964 /* Strided accesses? */
965 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
966 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
967 {
968 group_size = vect_cost_strided_group_size (stmt_info);
969 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
970 }
971 /* Not a strided access. */
972 else
973 {
974 group_size = 1;
975 first_dr = dr;
976 }
977
978 /* We assume that the cost of a single load-lanes instruction is
979 equivalent to the cost of GROUP_SIZE separate loads. If a strided
980 access is instead being provided by a load-and-permute operation,
981 include the cost of the permutes. */
982 if (!load_lanes_p && group_size > 1)
983 {
984 /* Uses an even and odd extract operations for each needed permute. */
985 inside_cost = ncopies * exact_log2(group_size) * group_size
986 * vect_get_stmt_cost (vector_stmt);
987
988 if (vect_print_dump_info (REPORT_COST))
989 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
990 group_size);
991 }
992
993 /* The loads themselves. */
994 vect_get_load_cost (first_dr, ncopies,
995 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
996 || slp_node),
997 &inside_cost, &outside_cost);
998
999 if (vect_print_dump_info (REPORT_COST))
1000 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1001 "outside_cost = %d .", inside_cost, outside_cost);
1002
1003 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1004 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1005 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1006 }
1007
1008
1009 /* Calculate cost of DR's memory access. */
1010 void
1011 vect_get_load_cost (struct data_reference *dr, int ncopies,
1012 bool add_realign_cost, unsigned int *inside_cost,
1013 unsigned int *outside_cost)
1014 {
1015 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1016
1017 switch (alignment_support_scheme)
1018 {
1019 case dr_aligned:
1020 {
1021 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1022
1023 if (vect_print_dump_info (REPORT_COST))
1024 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1025
1026 break;
1027 }
1028 case dr_unaligned_supported:
1029 {
1030 gimple stmt = DR_STMT (dr);
1031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1032 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1033
1034 /* Here, we assign an additional cost for the unaligned load. */
1035 *inside_cost += ncopies
1036 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1037 vectype, DR_MISALIGNMENT (dr));
1038 if (vect_print_dump_info (REPORT_COST))
1039 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1040 "hardware.");
1041
1042 break;
1043 }
1044 case dr_explicit_realign:
1045 {
1046 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1047 + vect_get_stmt_cost (vector_stmt));
1048
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 outside costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += vect_get_stmt_cost (vector_stmt);
1054
1055 break;
1056 }
1057 case dr_explicit_realign_optimized:
1058 {
1059 if (vect_print_dump_info (REPORT_COST))
1060 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1061 "pipelined.");
1062
1063 /* Unaligned software pipeline has a load of an address, an initial
1064 load, and possibly a mask operation to "prime" the loop. However,
1065 if this is an access in a group of loads, which provide strided
1066 access, then the above cost should only be considered for one
1067 access in the group. Inside the loop, there is a load op
1068 and a realignment op. */
1069
1070 if (add_realign_cost)
1071 {
1072 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1073 if (targetm.vectorize.builtin_mask_for_load)
1074 *outside_cost += vect_get_stmt_cost (vector_stmt);
1075 }
1076
1077 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1078 + vect_get_stmt_cost (vector_stmt));
1079 break;
1080 }
1081
1082 default:
1083 gcc_unreachable ();
1084 }
1085 }
1086
1087
1088 /* Function vect_init_vector.
1089
1090 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1091 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1092 is not NULL. Otherwise, place the initialization at the loop preheader.
1093 Return the DEF of INIT_STMT.
1094 It will be used in the vectorization of STMT. */
1095
1096 tree
1097 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1098 gimple_stmt_iterator *gsi)
1099 {
1100 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1101 tree new_var;
1102 gimple init_stmt;
1103 tree vec_oprnd;
1104 edge pe;
1105 tree new_temp;
1106 basic_block new_bb;
1107
1108 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1109 add_referenced_var (new_var);
1110 init_stmt = gimple_build_assign (new_var, vector_var);
1111 new_temp = make_ssa_name (new_var, init_stmt);
1112 gimple_assign_set_lhs (init_stmt, new_temp);
1113
1114 if (gsi)
1115 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1116 else
1117 {
1118 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1119
1120 if (loop_vinfo)
1121 {
1122 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1123
1124 if (nested_in_vect_loop_p (loop, stmt))
1125 loop = loop->inner;
1126
1127 pe = loop_preheader_edge (loop);
1128 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1129 gcc_assert (!new_bb);
1130 }
1131 else
1132 {
1133 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1134 basic_block bb;
1135 gimple_stmt_iterator gsi_bb_start;
1136
1137 gcc_assert (bb_vinfo);
1138 bb = BB_VINFO_BB (bb_vinfo);
1139 gsi_bb_start = gsi_after_labels (bb);
1140 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1141 }
1142 }
1143
1144 if (vect_print_dump_info (REPORT_DETAILS))
1145 {
1146 fprintf (vect_dump, "created new init_stmt: ");
1147 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1148 }
1149
1150 vec_oprnd = gimple_assign_lhs (init_stmt);
1151 return vec_oprnd;
1152 }
1153
1154
1155 /* Function vect_get_vec_def_for_operand.
1156
1157 OP is an operand in STMT. This function returns a (vector) def that will be
1158 used in the vectorized stmt for STMT.
1159
1160 In the case that OP is an SSA_NAME which is defined in the loop, then
1161 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1162
1163 In case OP is an invariant or constant, a new stmt that creates a vector def
1164 needs to be introduced. */
1165
1166 tree
1167 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1168 {
1169 tree vec_oprnd;
1170 gimple vec_stmt;
1171 gimple def_stmt;
1172 stmt_vec_info def_stmt_info = NULL;
1173 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1174 unsigned int nunits;
1175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1176 tree vec_inv;
1177 tree vec_cst;
1178 tree t = NULL_TREE;
1179 tree def;
1180 int i;
1181 enum vect_def_type dt;
1182 bool is_simple_use;
1183 tree vector_type;
1184
1185 if (vect_print_dump_info (REPORT_DETAILS))
1186 {
1187 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1188 print_generic_expr (vect_dump, op, TDF_SLIM);
1189 }
1190
1191 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1192 &dt);
1193 gcc_assert (is_simple_use);
1194 if (vect_print_dump_info (REPORT_DETAILS))
1195 {
1196 if (def)
1197 {
1198 fprintf (vect_dump, "def = ");
1199 print_generic_expr (vect_dump, def, TDF_SLIM);
1200 }
1201 if (def_stmt)
1202 {
1203 fprintf (vect_dump, " def_stmt = ");
1204 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1205 }
1206 }
1207
1208 switch (dt)
1209 {
1210 /* Case 1: operand is a constant. */
1211 case vect_constant_def:
1212 {
1213 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1214 gcc_assert (vector_type);
1215 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1216
1217 if (scalar_def)
1218 *scalar_def = op;
1219
1220 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1221 if (vect_print_dump_info (REPORT_DETAILS))
1222 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1223
1224 vec_cst = build_vector_from_val (vector_type,
1225 fold_convert (TREE_TYPE (vector_type),
1226 op));
1227 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1228 }
1229
1230 /* Case 2: operand is defined outside the loop - loop invariant. */
1231 case vect_external_def:
1232 {
1233 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1234 gcc_assert (vector_type);
1235 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1236
1237 if (scalar_def)
1238 *scalar_def = def;
1239
1240 /* Create 'vec_inv = {inv,inv,..,inv}' */
1241 if (vect_print_dump_info (REPORT_DETAILS))
1242 fprintf (vect_dump, "Create vector_inv.");
1243
1244 for (i = nunits - 1; i >= 0; --i)
1245 {
1246 t = tree_cons (NULL_TREE, def, t);
1247 }
1248
1249 /* FIXME: use build_constructor directly. */
1250 vec_inv = build_constructor_from_list (vector_type, t);
1251 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1252 }
1253
1254 /* Case 3: operand is defined inside the loop. */
1255 case vect_internal_def:
1256 {
1257 if (scalar_def)
1258 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1259
1260 /* Get the def from the vectorized stmt. */
1261 def_stmt_info = vinfo_for_stmt (def_stmt);
1262
1263 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1264 /* Get vectorized pattern statement. */
1265 if (!vec_stmt
1266 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1267 && !STMT_VINFO_RELEVANT (def_stmt_info))
1268 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1269 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1270 gcc_assert (vec_stmt);
1271 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1272 vec_oprnd = PHI_RESULT (vec_stmt);
1273 else if (is_gimple_call (vec_stmt))
1274 vec_oprnd = gimple_call_lhs (vec_stmt);
1275 else
1276 vec_oprnd = gimple_assign_lhs (vec_stmt);
1277 return vec_oprnd;
1278 }
1279
1280 /* Case 4: operand is defined by a loop header phi - reduction */
1281 case vect_reduction_def:
1282 case vect_double_reduction_def:
1283 case vect_nested_cycle:
1284 {
1285 struct loop *loop;
1286
1287 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1288 loop = (gimple_bb (def_stmt))->loop_father;
1289
1290 /* Get the def before the loop */
1291 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1292 return get_initial_def_for_reduction (stmt, op, scalar_def);
1293 }
1294
1295 /* Case 5: operand is defined by loop-header phi - induction. */
1296 case vect_induction_def:
1297 {
1298 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1299
1300 /* Get the def from the vectorized stmt. */
1301 def_stmt_info = vinfo_for_stmt (def_stmt);
1302 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1303 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1304 vec_oprnd = PHI_RESULT (vec_stmt);
1305 else
1306 vec_oprnd = gimple_get_lhs (vec_stmt);
1307 return vec_oprnd;
1308 }
1309
1310 default:
1311 gcc_unreachable ();
1312 }
1313 }
1314
1315
1316 /* Function vect_get_vec_def_for_stmt_copy
1317
1318 Return a vector-def for an operand. This function is used when the
1319 vectorized stmt to be created (by the caller to this function) is a "copy"
1320 created in case the vectorized result cannot fit in one vector, and several
1321 copies of the vector-stmt are required. In this case the vector-def is
1322 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1323 of the stmt that defines VEC_OPRND.
1324 DT is the type of the vector def VEC_OPRND.
1325
1326 Context:
1327 In case the vectorization factor (VF) is bigger than the number
1328 of elements that can fit in a vectype (nunits), we have to generate
1329 more than one vector stmt to vectorize the scalar stmt. This situation
1330 arises when there are multiple data-types operated upon in the loop; the
1331 smallest data-type determines the VF, and as a result, when vectorizing
1332 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1333 vector stmt (each computing a vector of 'nunits' results, and together
1334 computing 'VF' results in each iteration). This function is called when
1335 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1336 which VF=16 and nunits=4, so the number of copies required is 4):
1337
1338 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1339
1340 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1341 VS1.1: vx.1 = memref1 VS1.2
1342 VS1.2: vx.2 = memref2 VS1.3
1343 VS1.3: vx.3 = memref3
1344
1345 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1346 VSnew.1: vz1 = vx.1 + ... VSnew.2
1347 VSnew.2: vz2 = vx.2 + ... VSnew.3
1348 VSnew.3: vz3 = vx.3 + ...
1349
1350 The vectorization of S1 is explained in vectorizable_load.
1351 The vectorization of S2:
1352 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1353 the function 'vect_get_vec_def_for_operand' is called to
1354 get the relevant vector-def for each operand of S2. For operand x it
1355 returns the vector-def 'vx.0'.
1356
1357 To create the remaining copies of the vector-stmt (VSnew.j), this
1358 function is called to get the relevant vector-def for each operand. It is
1359 obtained from the respective VS1.j stmt, which is recorded in the
1360 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1361
1362 For example, to obtain the vector-def 'vx.1' in order to create the
1363 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1364 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1365 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1366 and return its def ('vx.1').
1367 Overall, to create the above sequence this function will be called 3 times:
1368 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1369 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1370 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1371
1372 tree
1373 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1374 {
1375 gimple vec_stmt_for_operand;
1376 stmt_vec_info def_stmt_info;
1377
1378 /* Do nothing; can reuse same def. */
1379 if (dt == vect_external_def || dt == vect_constant_def )
1380 return vec_oprnd;
1381
1382 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1383 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1384 gcc_assert (def_stmt_info);
1385 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1386 gcc_assert (vec_stmt_for_operand);
1387 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1388 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1390 else
1391 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1392 return vec_oprnd;
1393 }
1394
1395
1396 /* Get vectorized definitions for the operands to create a copy of an original
1397 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1398
1399 static void
1400 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1401 VEC(tree,heap) **vec_oprnds0,
1402 VEC(tree,heap) **vec_oprnds1)
1403 {
1404 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1405
1406 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1407 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1408
1409 if (vec_oprnds1 && *vec_oprnds1)
1410 {
1411 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1412 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1413 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1414 }
1415 }
1416
1417
1418 /* Get vectorized definitions for OP0 and OP1.
1419 REDUC_INDEX is the index of reduction operand in case of reduction,
1420 and -1 otherwise. */
1421
1422 void
1423 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1424 VEC (tree, heap) **vec_oprnds0,
1425 VEC (tree, heap) **vec_oprnds1,
1426 slp_tree slp_node, int reduc_index)
1427 {
1428 if (slp_node)
1429 {
1430 int nops = (op1 == NULL_TREE) ? 1 : 2;
1431 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1432 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1433
1434 VEC_quick_push (tree, ops, op0);
1435 if (op1)
1436 VEC_quick_push (tree, ops, op1);
1437
1438 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1439
1440 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1441 if (op1)
1442 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1443
1444 VEC_free (tree, heap, ops);
1445 VEC_free (slp_void_p, heap, vec_defs);
1446 }
1447 else
1448 {
1449 tree vec_oprnd;
1450
1451 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1452 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1453 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1454
1455 if (op1)
1456 {
1457 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1458 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1459 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1460 }
1461 }
1462 }
1463
1464
1465 /* Function vect_finish_stmt_generation.
1466
1467 Insert a new stmt. */
1468
1469 void
1470 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1471 gimple_stmt_iterator *gsi)
1472 {
1473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1474 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1475 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1476
1477 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1478
1479 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1480
1481 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1482 bb_vinfo));
1483
1484 if (vect_print_dump_info (REPORT_DETAILS))
1485 {
1486 fprintf (vect_dump, "add new stmt: ");
1487 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1488 }
1489
1490 gimple_set_location (vec_stmt, gimple_location (stmt));
1491 }
1492
1493 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1494 a function declaration if the target has a vectorized version
1495 of the function, or NULL_TREE if the function cannot be vectorized. */
1496
1497 tree
1498 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1499 {
1500 tree fndecl = gimple_call_fndecl (call);
1501
1502 /* We only handle functions that do not read or clobber memory -- i.e.
1503 const or novops ones. */
1504 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1505 return NULL_TREE;
1506
1507 if (!fndecl
1508 || TREE_CODE (fndecl) != FUNCTION_DECL
1509 || !DECL_BUILT_IN (fndecl))
1510 return NULL_TREE;
1511
1512 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1513 vectype_in);
1514 }
1515
1516 /* Function vectorizable_call.
1517
1518 Check if STMT performs a function call that can be vectorized.
1519 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1520 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1521 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1522
1523 static bool
1524 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1525 slp_tree slp_node)
1526 {
1527 tree vec_dest;
1528 tree scalar_dest;
1529 tree op, type;
1530 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1531 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1532 tree vectype_out, vectype_in;
1533 int nunits_in;
1534 int nunits_out;
1535 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1536 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1537 tree fndecl, new_temp, def, rhs_type;
1538 gimple def_stmt;
1539 enum vect_def_type dt[3]
1540 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1541 gimple new_stmt = NULL;
1542 int ncopies, j;
1543 VEC(tree, heap) *vargs = NULL;
1544 enum { NARROW, NONE, WIDEN } modifier;
1545 size_t i, nargs;
1546 tree lhs;
1547
1548 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1549 return false;
1550
1551 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1552 return false;
1553
1554 /* Is STMT a vectorizable call? */
1555 if (!is_gimple_call (stmt))
1556 return false;
1557
1558 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1559 return false;
1560
1561 if (stmt_can_throw_internal (stmt))
1562 return false;
1563
1564 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1565
1566 /* Process function arguments. */
1567 rhs_type = NULL_TREE;
1568 vectype_in = NULL_TREE;
1569 nargs = gimple_call_num_args (stmt);
1570
1571 /* Bail out if the function has more than three arguments, we do not have
1572 interesting builtin functions to vectorize with more than two arguments
1573 except for fma. No arguments is also not good. */
1574 if (nargs == 0 || nargs > 3)
1575 return false;
1576
1577 for (i = 0; i < nargs; i++)
1578 {
1579 tree opvectype;
1580
1581 op = gimple_call_arg (stmt, i);
1582
1583 /* We can only handle calls with arguments of the same type. */
1584 if (rhs_type
1585 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1586 {
1587 if (vect_print_dump_info (REPORT_DETAILS))
1588 fprintf (vect_dump, "argument types differ.");
1589 return false;
1590 }
1591 if (!rhs_type)
1592 rhs_type = TREE_TYPE (op);
1593
1594 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
1595 &def_stmt, &def, &dt[i], &opvectype))
1596 {
1597 if (vect_print_dump_info (REPORT_DETAILS))
1598 fprintf (vect_dump, "use not simple.");
1599 return false;
1600 }
1601
1602 if (!vectype_in)
1603 vectype_in = opvectype;
1604 else if (opvectype
1605 && opvectype != vectype_in)
1606 {
1607 if (vect_print_dump_info (REPORT_DETAILS))
1608 fprintf (vect_dump, "argument vector types differ.");
1609 return false;
1610 }
1611 }
1612 /* If all arguments are external or constant defs use a vector type with
1613 the same size as the output vector type. */
1614 if (!vectype_in)
1615 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1616 if (vec_stmt)
1617 gcc_assert (vectype_in);
1618 if (!vectype_in)
1619 {
1620 if (vect_print_dump_info (REPORT_DETAILS))
1621 {
1622 fprintf (vect_dump, "no vectype for scalar type ");
1623 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1624 }
1625
1626 return false;
1627 }
1628
1629 /* FORNOW */
1630 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1631 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1632 if (nunits_in == nunits_out / 2)
1633 modifier = NARROW;
1634 else if (nunits_out == nunits_in)
1635 modifier = NONE;
1636 else if (nunits_out == nunits_in / 2)
1637 modifier = WIDEN;
1638 else
1639 return false;
1640
1641 /* For now, we only vectorize functions if a target specific builtin
1642 is available. TODO -- in some cases, it might be profitable to
1643 insert the calls for pieces of the vector, in order to be able
1644 to vectorize other operations in the loop. */
1645 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1646 if (fndecl == NULL_TREE)
1647 {
1648 if (vect_print_dump_info (REPORT_DETAILS))
1649 fprintf (vect_dump, "function is not vectorizable.");
1650
1651 return false;
1652 }
1653
1654 gcc_assert (!gimple_vuse (stmt));
1655
1656 if (slp_node || PURE_SLP_STMT (stmt_info))
1657 ncopies = 1;
1658 else if (modifier == NARROW)
1659 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1660 else
1661 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1662
1663 /* Sanity check: make sure that at least one copy of the vectorized stmt
1664 needs to be generated. */
1665 gcc_assert (ncopies >= 1);
1666
1667 if (!vec_stmt) /* transformation not required. */
1668 {
1669 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1670 if (vect_print_dump_info (REPORT_DETAILS))
1671 fprintf (vect_dump, "=== vectorizable_call ===");
1672 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1673 return true;
1674 }
1675
1676 /** Transform. **/
1677
1678 if (vect_print_dump_info (REPORT_DETAILS))
1679 fprintf (vect_dump, "transform call.");
1680
1681 /* Handle def. */
1682 scalar_dest = gimple_call_lhs (stmt);
1683 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1684
1685 prev_stmt_info = NULL;
1686 switch (modifier)
1687 {
1688 case NONE:
1689 for (j = 0; j < ncopies; ++j)
1690 {
1691 /* Build argument list for the vectorized call. */
1692 if (j == 0)
1693 vargs = VEC_alloc (tree, heap, nargs);
1694 else
1695 VEC_truncate (tree, vargs, 0);
1696
1697 if (slp_node)
1698 {
1699 VEC (slp_void_p, heap) *vec_defs
1700 = VEC_alloc (slp_void_p, heap, nargs);
1701 VEC (tree, heap) *vec_oprnds0;
1702
1703 for (i = 0; i < nargs; i++)
1704 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1705 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1706 vec_oprnds0
1707 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1708
1709 /* Arguments are ready. Create the new vector stmt. */
1710 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1711 {
1712 size_t k;
1713 for (k = 0; k < nargs; k++)
1714 {
1715 VEC (tree, heap) *vec_oprndsk
1716 = (VEC (tree, heap) *)
1717 VEC_index (slp_void_p, vec_defs, k);
1718 VEC_replace (tree, vargs, k,
1719 VEC_index (tree, vec_oprndsk, i));
1720 }
1721 new_stmt = gimple_build_call_vec (fndecl, vargs);
1722 new_temp = make_ssa_name (vec_dest, new_stmt);
1723 gimple_call_set_lhs (new_stmt, new_temp);
1724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1725 mark_symbols_for_renaming (new_stmt);
1726 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1727 new_stmt);
1728 }
1729
1730 for (i = 0; i < nargs; i++)
1731 {
1732 VEC (tree, heap) *vec_oprndsi
1733 = (VEC (tree, heap) *)
1734 VEC_index (slp_void_p, vec_defs, i);
1735 VEC_free (tree, heap, vec_oprndsi);
1736 }
1737 VEC_free (slp_void_p, heap, vec_defs);
1738 continue;
1739 }
1740
1741 for (i = 0; i < nargs; i++)
1742 {
1743 op = gimple_call_arg (stmt, i);
1744 if (j == 0)
1745 vec_oprnd0
1746 = vect_get_vec_def_for_operand (op, stmt, NULL);
1747 else
1748 {
1749 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1750 vec_oprnd0
1751 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1752 }
1753
1754 VEC_quick_push (tree, vargs, vec_oprnd0);
1755 }
1756
1757 new_stmt = gimple_build_call_vec (fndecl, vargs);
1758 new_temp = make_ssa_name (vec_dest, new_stmt);
1759 gimple_call_set_lhs (new_stmt, new_temp);
1760
1761 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1762 mark_symbols_for_renaming (new_stmt);
1763
1764 if (j == 0)
1765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1766 else
1767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1768
1769 prev_stmt_info = vinfo_for_stmt (new_stmt);
1770 }
1771
1772 break;
1773
1774 case NARROW:
1775 for (j = 0; j < ncopies; ++j)
1776 {
1777 /* Build argument list for the vectorized call. */
1778 if (j == 0)
1779 vargs = VEC_alloc (tree, heap, nargs * 2);
1780 else
1781 VEC_truncate (tree, vargs, 0);
1782
1783 if (slp_node)
1784 {
1785 VEC (slp_void_p, heap) *vec_defs
1786 = VEC_alloc (slp_void_p, heap, nargs);
1787 VEC (tree, heap) *vec_oprnds0;
1788
1789 for (i = 0; i < nargs; i++)
1790 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1791 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1792 vec_oprnds0
1793 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1794
1795 /* Arguments are ready. Create the new vector stmt. */
1796 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1797 i += 2)
1798 {
1799 size_t k;
1800 VEC_truncate (tree, vargs, 0);
1801 for (k = 0; k < nargs; k++)
1802 {
1803 VEC (tree, heap) *vec_oprndsk
1804 = (VEC (tree, heap) *)
1805 VEC_index (slp_void_p, vec_defs, k);
1806 VEC_quick_push (tree, vargs,
1807 VEC_index (tree, vec_oprndsk, i));
1808 VEC_quick_push (tree, vargs,
1809 VEC_index (tree, vec_oprndsk, i + 1));
1810 }
1811 new_stmt = gimple_build_call_vec (fndecl, vargs);
1812 new_temp = make_ssa_name (vec_dest, new_stmt);
1813 gimple_call_set_lhs (new_stmt, new_temp);
1814 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1815 mark_symbols_for_renaming (new_stmt);
1816 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1817 new_stmt);
1818 }
1819
1820 for (i = 0; i < nargs; i++)
1821 {
1822 VEC (tree, heap) *vec_oprndsi
1823 = (VEC (tree, heap) *)
1824 VEC_index (slp_void_p, vec_defs, i);
1825 VEC_free (tree, heap, vec_oprndsi);
1826 }
1827 VEC_free (slp_void_p, heap, vec_defs);
1828 continue;
1829 }
1830
1831 for (i = 0; i < nargs; i++)
1832 {
1833 op = gimple_call_arg (stmt, i);
1834 if (j == 0)
1835 {
1836 vec_oprnd0
1837 = vect_get_vec_def_for_operand (op, stmt, NULL);
1838 vec_oprnd1
1839 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1840 }
1841 else
1842 {
1843 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1844 vec_oprnd0
1845 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1846 vec_oprnd1
1847 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1848 }
1849
1850 VEC_quick_push (tree, vargs, vec_oprnd0);
1851 VEC_quick_push (tree, vargs, vec_oprnd1);
1852 }
1853
1854 new_stmt = gimple_build_call_vec (fndecl, vargs);
1855 new_temp = make_ssa_name (vec_dest, new_stmt);
1856 gimple_call_set_lhs (new_stmt, new_temp);
1857
1858 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1859 mark_symbols_for_renaming (new_stmt);
1860
1861 if (j == 0)
1862 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1863 else
1864 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1865
1866 prev_stmt_info = vinfo_for_stmt (new_stmt);
1867 }
1868
1869 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1870
1871 break;
1872
1873 case WIDEN:
1874 /* No current target implements this case. */
1875 return false;
1876 }
1877
1878 VEC_free (tree, heap, vargs);
1879
1880 /* Update the exception handling table with the vector stmt if necessary. */
1881 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1882 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1883
1884 /* The call in STMT might prevent it from being removed in dce.
1885 We however cannot remove it here, due to the way the ssa name
1886 it defines is mapped to the new definition. So just replace
1887 rhs of the statement with something harmless. */
1888
1889 type = TREE_TYPE (scalar_dest);
1890 if (is_pattern_stmt_p (stmt_info))
1891 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1892 else
1893 lhs = gimple_call_lhs (stmt);
1894 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1895 set_vinfo_for_stmt (new_stmt, stmt_info);
1896 if (!slp_node)
1897 set_vinfo_for_stmt (stmt, NULL);
1898 STMT_VINFO_STMT (stmt_info) = new_stmt;
1899 gsi_replace (gsi, new_stmt, false);
1900 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1901
1902 return true;
1903 }
1904
1905
1906 /* Function vect_gen_widened_results_half
1907
1908 Create a vector stmt whose code, type, number of arguments, and result
1909 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1910 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1911 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1912 needs to be created (DECL is a function-decl of a target-builtin).
1913 STMT is the original scalar stmt that we are vectorizing. */
1914
1915 static gimple
1916 vect_gen_widened_results_half (enum tree_code code,
1917 tree decl,
1918 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1919 tree vec_dest, gimple_stmt_iterator *gsi,
1920 gimple stmt)
1921 {
1922 gimple new_stmt;
1923 tree new_temp;
1924
1925 /* Generate half of the widened result: */
1926 if (code == CALL_EXPR)
1927 {
1928 /* Target specific support */
1929 if (op_type == binary_op)
1930 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1931 else
1932 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1933 new_temp = make_ssa_name (vec_dest, new_stmt);
1934 gimple_call_set_lhs (new_stmt, new_temp);
1935 }
1936 else
1937 {
1938 /* Generic support */
1939 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1940 if (op_type != binary_op)
1941 vec_oprnd1 = NULL;
1942 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1943 vec_oprnd1);
1944 new_temp = make_ssa_name (vec_dest, new_stmt);
1945 gimple_assign_set_lhs (new_stmt, new_temp);
1946 }
1947 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1948
1949 return new_stmt;
1950 }
1951
1952
1953 /* Get vectorized definitions for loop-based vectorization. For the first
1954 operand we call vect_get_vec_def_for_operand() (with OPRND containing
1955 scalar operand), and for the rest we get a copy with
1956 vect_get_vec_def_for_stmt_copy() using the previous vector definition
1957 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
1958 The vectors are collected into VEC_OPRNDS. */
1959
1960 static void
1961 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
1962 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
1963 {
1964 tree vec_oprnd;
1965
1966 /* Get first vector operand. */
1967 /* All the vector operands except the very first one (that is scalar oprnd)
1968 are stmt copies. */
1969 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
1970 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
1971 else
1972 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
1973
1974 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1975
1976 /* Get second vector operand. */
1977 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
1978 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1979
1980 *oprnd = vec_oprnd;
1981
1982 /* For conversion in multiple steps, continue to get operands
1983 recursively. */
1984 if (multi_step_cvt)
1985 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
1986 }
1987
1988
1989 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
1990 For multi-step conversions store the resulting vectors and call the function
1991 recursively. */
1992
1993 static void
1994 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
1995 int multi_step_cvt, gimple stmt,
1996 VEC (tree, heap) *vec_dsts,
1997 gimple_stmt_iterator *gsi,
1998 slp_tree slp_node, enum tree_code code,
1999 stmt_vec_info *prev_stmt_info)
2000 {
2001 unsigned int i;
2002 tree vop0, vop1, new_tmp, vec_dest;
2003 gimple new_stmt;
2004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2005
2006 vec_dest = VEC_pop (tree, vec_dsts);
2007
2008 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2009 {
2010 /* Create demotion operation. */
2011 vop0 = VEC_index (tree, *vec_oprnds, i);
2012 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2013 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2014 new_tmp = make_ssa_name (vec_dest, new_stmt);
2015 gimple_assign_set_lhs (new_stmt, new_tmp);
2016 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2017
2018 if (multi_step_cvt)
2019 /* Store the resulting vector for next recursive call. */
2020 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2021 else
2022 {
2023 /* This is the last step of the conversion sequence. Store the
2024 vectors in SLP_NODE or in vector info of the scalar statement
2025 (or in STMT_VINFO_RELATED_STMT chain). */
2026 if (slp_node)
2027 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2028 else
2029 {
2030 if (!*prev_stmt_info)
2031 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2032 else
2033 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2034
2035 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2036 }
2037 }
2038 }
2039
2040 /* For multi-step demotion operations we first generate demotion operations
2041 from the source type to the intermediate types, and then combine the
2042 results (stored in VEC_OPRNDS) in demotion operation to the destination
2043 type. */
2044 if (multi_step_cvt)
2045 {
2046 /* At each level of recursion we have half of the operands we had at the
2047 previous level. */
2048 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2049 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2050 stmt, vec_dsts, gsi, slp_node,
2051 VEC_PACK_TRUNC_EXPR,
2052 prev_stmt_info);
2053 }
2054
2055 VEC_quick_push (tree, vec_dsts, vec_dest);
2056 }
2057
2058
2059 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2060 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2061 the resulting vectors and call the function recursively. */
2062
2063 static void
2064 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2065 VEC (tree, heap) **vec_oprnds1,
2066 gimple stmt, tree vec_dest,
2067 gimple_stmt_iterator *gsi,
2068 enum tree_code code1,
2069 enum tree_code code2, tree decl1,
2070 tree decl2, int op_type)
2071 {
2072 int i;
2073 tree vop0, vop1, new_tmp1, new_tmp2;
2074 gimple new_stmt1, new_stmt2;
2075 VEC (tree, heap) *vec_tmp = NULL;
2076
2077 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2078 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2079 {
2080 if (op_type == binary_op)
2081 vop1 = VEC_index (tree, *vec_oprnds1, i);
2082 else
2083 vop1 = NULL_TREE;
2084
2085 /* Generate the two halves of promotion operation. */
2086 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2087 op_type, vec_dest, gsi, stmt);
2088 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2089 op_type, vec_dest, gsi, stmt);
2090 if (is_gimple_call (new_stmt1))
2091 {
2092 new_tmp1 = gimple_call_lhs (new_stmt1);
2093 new_tmp2 = gimple_call_lhs (new_stmt2);
2094 }
2095 else
2096 {
2097 new_tmp1 = gimple_assign_lhs (new_stmt1);
2098 new_tmp2 = gimple_assign_lhs (new_stmt2);
2099 }
2100
2101 /* Store the results for the next step. */
2102 VEC_quick_push (tree, vec_tmp, new_tmp1);
2103 VEC_quick_push (tree, vec_tmp, new_tmp2);
2104 }
2105
2106 VEC_free (tree, heap, *vec_oprnds0);
2107 *vec_oprnds0 = vec_tmp;
2108 }
2109
2110
2111 /* Check if STMT performs a conversion operation, that can be vectorized.
2112 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2113 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2114 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2115
2116 static bool
2117 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2118 gimple *vec_stmt, slp_tree slp_node)
2119 {
2120 tree vec_dest;
2121 tree scalar_dest;
2122 tree op0, op1 = NULL_TREE;
2123 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2124 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2125 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2126 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2127 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2128 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2129 tree new_temp;
2130 tree def;
2131 gimple def_stmt;
2132 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2133 gimple new_stmt = NULL;
2134 stmt_vec_info prev_stmt_info;
2135 int nunits_in;
2136 int nunits_out;
2137 tree vectype_out, vectype_in;
2138 int ncopies, i, j;
2139 tree lhs_type, rhs_type;
2140 enum { NARROW, NONE, WIDEN } modifier;
2141 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2142 tree vop0;
2143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2144 int multi_step_cvt = 0;
2145 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2146 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2147 int op_type;
2148 enum machine_mode rhs_mode;
2149 unsigned short fltsz;
2150
2151 /* Is STMT a vectorizable conversion? */
2152
2153 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2154 return false;
2155
2156 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2157 return false;
2158
2159 if (!is_gimple_assign (stmt))
2160 return false;
2161
2162 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2163 return false;
2164
2165 code = gimple_assign_rhs_code (stmt);
2166 if (!CONVERT_EXPR_CODE_P (code)
2167 && code != FIX_TRUNC_EXPR
2168 && code != FLOAT_EXPR
2169 && code != WIDEN_MULT_EXPR
2170 && code != WIDEN_LSHIFT_EXPR)
2171 return false;
2172
2173 op_type = TREE_CODE_LENGTH (code);
2174
2175 /* Check types of lhs and rhs. */
2176 scalar_dest = gimple_assign_lhs (stmt);
2177 lhs_type = TREE_TYPE (scalar_dest);
2178 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2179
2180 op0 = gimple_assign_rhs1 (stmt);
2181 rhs_type = TREE_TYPE (op0);
2182
2183 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2184 && !((INTEGRAL_TYPE_P (lhs_type)
2185 && INTEGRAL_TYPE_P (rhs_type))
2186 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2187 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2188 return false;
2189
2190 if ((INTEGRAL_TYPE_P (lhs_type)
2191 && (TYPE_PRECISION (lhs_type)
2192 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2193 || (INTEGRAL_TYPE_P (rhs_type)
2194 && (TYPE_PRECISION (rhs_type)
2195 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2196 {
2197 if (vect_print_dump_info (REPORT_DETAILS))
2198 fprintf (vect_dump,
2199 "type conversion to/from bit-precision unsupported.");
2200 return false;
2201 }
2202
2203 /* Check the operands of the operation. */
2204 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2205 &def_stmt, &def, &dt[0], &vectype_in))
2206 {
2207 if (vect_print_dump_info (REPORT_DETAILS))
2208 fprintf (vect_dump, "use not simple.");
2209 return false;
2210 }
2211 if (op_type == binary_op)
2212 {
2213 bool ok;
2214
2215 op1 = gimple_assign_rhs2 (stmt);
2216 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2217 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2218 OP1. */
2219 if (CONSTANT_CLASS_P (op0))
2220 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
2221 &def_stmt, &def, &dt[1], &vectype_in);
2222 else
2223 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
2224 &dt[1]);
2225
2226 if (!ok)
2227 {
2228 if (vect_print_dump_info (REPORT_DETAILS))
2229 fprintf (vect_dump, "use not simple.");
2230 return false;
2231 }
2232 }
2233
2234 /* If op0 is an external or constant defs use a vector type of
2235 the same size as the output vector type. */
2236 if (!vectype_in)
2237 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2238 if (vec_stmt)
2239 gcc_assert (vectype_in);
2240 if (!vectype_in)
2241 {
2242 if (vect_print_dump_info (REPORT_DETAILS))
2243 {
2244 fprintf (vect_dump, "no vectype for scalar type ");
2245 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2246 }
2247
2248 return false;
2249 }
2250
2251 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2252 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2253 if (nunits_in < nunits_out)
2254 modifier = NARROW;
2255 else if (nunits_out == nunits_in)
2256 modifier = NONE;
2257 else
2258 modifier = WIDEN;
2259
2260 /* Multiple types in SLP are handled by creating the appropriate number of
2261 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2262 case of SLP. */
2263 if (slp_node || PURE_SLP_STMT (stmt_info))
2264 ncopies = 1;
2265 else if (modifier == NARROW)
2266 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2267 else
2268 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2269
2270 /* Sanity check: make sure that at least one copy of the vectorized stmt
2271 needs to be generated. */
2272 gcc_assert (ncopies >= 1);
2273
2274 /* Supportable by target? */
2275 switch (modifier)
2276 {
2277 case NONE:
2278 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2279 return false;
2280 if (supportable_convert_operation (code, vectype_out, vectype_in,
2281 &decl1, &code1))
2282 break;
2283 /* FALLTHRU */
2284 unsupported:
2285 if (vect_print_dump_info (REPORT_DETAILS))
2286 fprintf (vect_dump, "conversion not supported by target.");
2287 return false;
2288
2289 case WIDEN:
2290 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2291 &decl1, &decl2, &code1, &code2,
2292 &multi_step_cvt, &interm_types))
2293 {
2294 /* Binary widening operation can only be supported directly by the
2295 architecture. */
2296 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2297 break;
2298 }
2299
2300 if (code != FLOAT_EXPR
2301 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2302 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2303 goto unsupported;
2304
2305 rhs_mode = TYPE_MODE (rhs_type);
2306 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2307 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2308 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2309 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2310 {
2311 cvt_type
2312 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2313 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2314 if (cvt_type == NULL_TREE)
2315 goto unsupported;
2316
2317 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2318 {
2319 if (!supportable_convert_operation (code, vectype_out,
2320 cvt_type, &decl1, &codecvt1))
2321 goto unsupported;
2322 }
2323 else if (!supportable_widening_operation (code, stmt, vectype_out,
2324 cvt_type, &decl1, &decl2,
2325 &codecvt1, &codecvt2,
2326 &multi_step_cvt,
2327 &interm_types))
2328 continue;
2329 else
2330 gcc_assert (multi_step_cvt == 0);
2331
2332 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2333 vectype_in, NULL, NULL, &code1,
2334 &code2, &multi_step_cvt,
2335 &interm_types))
2336 break;
2337 }
2338
2339 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2340 goto unsupported;
2341
2342 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2343 codecvt2 = ERROR_MARK;
2344 else
2345 {
2346 multi_step_cvt++;
2347 VEC_safe_push (tree, heap, interm_types, cvt_type);
2348 cvt_type = NULL_TREE;
2349 }
2350 break;
2351
2352 case NARROW:
2353 gcc_assert (op_type == unary_op);
2354 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2355 &code1, &multi_step_cvt,
2356 &interm_types))
2357 break;
2358
2359 if (code != FIX_TRUNC_EXPR
2360 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2361 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2362 goto unsupported;
2363
2364 rhs_mode = TYPE_MODE (rhs_type);
2365 cvt_type
2366 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2367 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2368 if (cvt_type == NULL_TREE)
2369 goto unsupported;
2370 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2371 &decl1, &codecvt1))
2372 goto unsupported;
2373 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2374 &code1, &multi_step_cvt,
2375 &interm_types))
2376 break;
2377 goto unsupported;
2378
2379 default:
2380 gcc_unreachable ();
2381 }
2382
2383 if (!vec_stmt) /* transformation not required. */
2384 {
2385 if (vect_print_dump_info (REPORT_DETAILS))
2386 fprintf (vect_dump, "=== vectorizable_conversion ===");
2387 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2388 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2389 else if (modifier == NARROW)
2390 {
2391 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2392 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2393 }
2394 else
2395 {
2396 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2397 vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
2398 }
2399 VEC_free (tree, heap, interm_types);
2400 return true;
2401 }
2402
2403 /** Transform. **/
2404 if (vect_print_dump_info (REPORT_DETAILS))
2405 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2406
2407 if (op_type == binary_op)
2408 {
2409 if (CONSTANT_CLASS_P (op0))
2410 op0 = fold_convert (TREE_TYPE (op1), op0);
2411 else if (CONSTANT_CLASS_P (op1))
2412 op1 = fold_convert (TREE_TYPE (op0), op1);
2413 }
2414
2415 /* In case of multi-step conversion, we first generate conversion operations
2416 to the intermediate types, and then from that types to the final one.
2417 We create vector destinations for the intermediate type (TYPES) received
2418 from supportable_*_operation, and store them in the correct order
2419 for future use in vect_create_vectorized_*_stmts (). */
2420 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2421 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2422 VEC_quick_push (tree, vec_dsts, vec_dest);
2423
2424 if (multi_step_cvt)
2425 {
2426 for (i = VEC_length (tree, interm_types) - 1;
2427 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2428 {
2429 vec_dest = vect_create_destination_var (scalar_dest,
2430 intermediate_type);
2431 VEC_quick_push (tree, vec_dsts, vec_dest);
2432 }
2433 }
2434
2435 if (cvt_type)
2436 vec_dest = vect_create_destination_var (scalar_dest, cvt_type);
2437
2438 if (!slp_node)
2439 {
2440 if (modifier == NONE)
2441 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2442 else if (modifier == WIDEN)
2443 {
2444 vec_oprnds0 = VEC_alloc (tree, heap,
2445 (multi_step_cvt
2446 ? vect_pow2 (multi_step_cvt) : 1));
2447 if (op_type == binary_op)
2448 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2449 }
2450 else
2451 vec_oprnds0 = VEC_alloc (tree, heap,
2452 2 * (multi_step_cvt
2453 ? vect_pow2 (multi_step_cvt) : 1));
2454 }
2455 else if (code == WIDEN_LSHIFT_EXPR)
2456 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2457
2458 last_oprnd = op0;
2459 prev_stmt_info = NULL;
2460 switch (modifier)
2461 {
2462 case NONE:
2463 for (j = 0; j < ncopies; j++)
2464 {
2465 if (j == 0)
2466 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2467 -1);
2468 else
2469 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2470
2471 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2472 {
2473 /* Arguments are ready, create the new vector stmt. */
2474 if (code1 == CALL_EXPR)
2475 {
2476 new_stmt = gimple_build_call (decl1, 1, vop0);
2477 new_temp = make_ssa_name (vec_dest, new_stmt);
2478 gimple_call_set_lhs (new_stmt, new_temp);
2479 }
2480 else
2481 {
2482 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2483 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2484 vop0, NULL);
2485 new_temp = make_ssa_name (vec_dest, new_stmt);
2486 gimple_assign_set_lhs (new_stmt, new_temp);
2487 }
2488
2489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2490 if (slp_node)
2491 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2492 new_stmt);
2493 }
2494
2495 if (j == 0)
2496 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2497 else
2498 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2499 prev_stmt_info = vinfo_for_stmt (new_stmt);
2500 }
2501 break;
2502
2503 case WIDEN:
2504 /* In case the vectorization factor (VF) is bigger than the number
2505 of elements that we can fit in a vectype (nunits), we have to
2506 generate more than one vector stmt - i.e - we need to "unroll"
2507 the vector stmt by a factor VF/nunits. */
2508 for (j = 0; j < ncopies; j++)
2509 {
2510 /* Handle uses. */
2511 if (j == 0)
2512 {
2513 if (slp_node)
2514 {
2515 if (code == WIDEN_LSHIFT_EXPR)
2516 {
2517 unsigned int k;
2518
2519 vec_oprnd1 = op1;
2520 /* Store vec_oprnd1 for every vector stmt to be created
2521 for SLP_NODE. We check during the analysis that all
2522 the shift arguments are the same. */
2523 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2524 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2525
2526 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2527 slp_node, -1);
2528 }
2529 else
2530 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2531 &vec_oprnds1, slp_node, -1);
2532 }
2533 else
2534 {
2535 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2536 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2537 if (op_type == binary_op)
2538 {
2539 if (code == WIDEN_LSHIFT_EXPR)
2540 vec_oprnd1 = op1;
2541 else
2542 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2543 NULL);
2544 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2545 }
2546 }
2547 }
2548 else
2549 {
2550 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2551 VEC_truncate (tree, vec_oprnds0, 0);
2552 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2553 if (op_type == binary_op)
2554 {
2555 if (code == WIDEN_LSHIFT_EXPR)
2556 vec_oprnd1 = op1;
2557 else
2558 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2559 vec_oprnd1);
2560 VEC_truncate (tree, vec_oprnds1, 0);
2561 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2562 }
2563 }
2564
2565 /* Arguments are ready. Create the new vector stmts. */
2566 for (i = multi_step_cvt; i >= 0; i--)
2567 {
2568 tree this_dest = VEC_index (tree, vec_dsts, i);
2569 enum tree_code c1 = code1, c2 = code2;
2570 if (i == 0 && codecvt2 != ERROR_MARK)
2571 {
2572 c1 = codecvt1;
2573 c2 = codecvt2;
2574 }
2575 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2576 &vec_oprnds1,
2577 stmt, this_dest, gsi,
2578 c1, c2, decl1, decl2,
2579 op_type);
2580 }
2581
2582 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2583 {
2584 if (cvt_type)
2585 {
2586 if (codecvt1 == CALL_EXPR)
2587 {
2588 new_stmt = gimple_build_call (decl1, 1, vop0);
2589 new_temp = make_ssa_name (vec_dest, new_stmt);
2590 gimple_call_set_lhs (new_stmt, new_temp);
2591 }
2592 else
2593 {
2594 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2595 new_temp = make_ssa_name (vec_dest, NULL);
2596 new_stmt = gimple_build_assign_with_ops (codecvt1,
2597 new_temp,
2598 vop0, NULL);
2599 }
2600
2601 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2602 }
2603 else
2604 new_stmt = SSA_NAME_DEF_STMT (vop0);
2605
2606 if (slp_node)
2607 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2608 new_stmt);
2609 else
2610 {
2611 if (!prev_stmt_info)
2612 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2613 else
2614 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2615 prev_stmt_info = vinfo_for_stmt (new_stmt);
2616 }
2617 }
2618 }
2619
2620 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2621 break;
2622
2623 case NARROW:
2624 /* In case the vectorization factor (VF) is bigger than the number
2625 of elements that we can fit in a vectype (nunits), we have to
2626 generate more than one vector stmt - i.e - we need to "unroll"
2627 the vector stmt by a factor VF/nunits. */
2628 for (j = 0; j < ncopies; j++)
2629 {
2630 /* Handle uses. */
2631 if (slp_node)
2632 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2633 slp_node, -1);
2634 else
2635 {
2636 VEC_truncate (tree, vec_oprnds0, 0);
2637 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2638 vect_pow2 (multi_step_cvt) - 1);
2639 }
2640
2641 /* Arguments are ready. Create the new vector stmts. */
2642 if (cvt_type)
2643 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2644 {
2645 if (codecvt1 == CALL_EXPR)
2646 {
2647 new_stmt = gimple_build_call (decl1, 1, vop0);
2648 new_temp = make_ssa_name (vec_dest, new_stmt);
2649 gimple_call_set_lhs (new_stmt, new_temp);
2650 }
2651 else
2652 {
2653 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2654 new_temp = make_ssa_name (vec_dest, NULL);
2655 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2656 vop0, NULL);
2657 }
2658
2659 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2660 VEC_replace (tree, vec_oprnds0, i, new_temp);
2661 }
2662
2663 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2664 stmt, vec_dsts, gsi,
2665 slp_node, code1,
2666 &prev_stmt_info);
2667 }
2668
2669 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2670 break;
2671 }
2672
2673 VEC_free (tree, heap, vec_oprnds0);
2674 VEC_free (tree, heap, vec_oprnds1);
2675 VEC_free (tree, heap, vec_dsts);
2676 VEC_free (tree, heap, interm_types);
2677
2678 return true;
2679 }
2680
2681
2682 /* Function vectorizable_assignment.
2683
2684 Check if STMT performs an assignment (copy) that can be vectorized.
2685 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2686 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2687 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2688
2689 static bool
2690 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2691 gimple *vec_stmt, slp_tree slp_node)
2692 {
2693 tree vec_dest;
2694 tree scalar_dest;
2695 tree op;
2696 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2697 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2698 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2699 tree new_temp;
2700 tree def;
2701 gimple def_stmt;
2702 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2703 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2704 int ncopies;
2705 int i, j;
2706 VEC(tree,heap) *vec_oprnds = NULL;
2707 tree vop;
2708 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2709 gimple new_stmt = NULL;
2710 stmt_vec_info prev_stmt_info = NULL;
2711 enum tree_code code;
2712 tree vectype_in;
2713
2714 /* Multiple types in SLP are handled by creating the appropriate number of
2715 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2716 case of SLP. */
2717 if (slp_node || PURE_SLP_STMT (stmt_info))
2718 ncopies = 1;
2719 else
2720 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2721
2722 gcc_assert (ncopies >= 1);
2723
2724 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2725 return false;
2726
2727 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2728 return false;
2729
2730 /* Is vectorizable assignment? */
2731 if (!is_gimple_assign (stmt))
2732 return false;
2733
2734 scalar_dest = gimple_assign_lhs (stmt);
2735 if (TREE_CODE (scalar_dest) != SSA_NAME)
2736 return false;
2737
2738 code = gimple_assign_rhs_code (stmt);
2739 if (gimple_assign_single_p (stmt)
2740 || code == PAREN_EXPR
2741 || CONVERT_EXPR_CODE_P (code))
2742 op = gimple_assign_rhs1 (stmt);
2743 else
2744 return false;
2745
2746 if (code == VIEW_CONVERT_EXPR)
2747 op = TREE_OPERAND (op, 0);
2748
2749 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2750 &def_stmt, &def, &dt[0], &vectype_in))
2751 {
2752 if (vect_print_dump_info (REPORT_DETAILS))
2753 fprintf (vect_dump, "use not simple.");
2754 return false;
2755 }
2756
2757 /* We can handle NOP_EXPR conversions that do not change the number
2758 of elements or the vector size. */
2759 if ((CONVERT_EXPR_CODE_P (code)
2760 || code == VIEW_CONVERT_EXPR)
2761 && (!vectype_in
2762 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2763 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2764 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2765 return false;
2766
2767 /* We do not handle bit-precision changes. */
2768 if ((CONVERT_EXPR_CODE_P (code)
2769 || code == VIEW_CONVERT_EXPR)
2770 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2771 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2772 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2773 || ((TYPE_PRECISION (TREE_TYPE (op))
2774 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2775 /* But a conversion that does not change the bit-pattern is ok. */
2776 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2777 > TYPE_PRECISION (TREE_TYPE (op)))
2778 && TYPE_UNSIGNED (TREE_TYPE (op))))
2779 {
2780 if (vect_print_dump_info (REPORT_DETAILS))
2781 fprintf (vect_dump, "type conversion to/from bit-precision "
2782 "unsupported.");
2783 return false;
2784 }
2785
2786 if (!vec_stmt) /* transformation not required. */
2787 {
2788 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2789 if (vect_print_dump_info (REPORT_DETAILS))
2790 fprintf (vect_dump, "=== vectorizable_assignment ===");
2791 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2792 return true;
2793 }
2794
2795 /** Transform. **/
2796 if (vect_print_dump_info (REPORT_DETAILS))
2797 fprintf (vect_dump, "transform assignment.");
2798
2799 /* Handle def. */
2800 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2801
2802 /* Handle use. */
2803 for (j = 0; j < ncopies; j++)
2804 {
2805 /* Handle uses. */
2806 if (j == 0)
2807 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2808 else
2809 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2810
2811 /* Arguments are ready. create the new vector stmt. */
2812 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2813 {
2814 if (CONVERT_EXPR_CODE_P (code)
2815 || code == VIEW_CONVERT_EXPR)
2816 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2817 new_stmt = gimple_build_assign (vec_dest, vop);
2818 new_temp = make_ssa_name (vec_dest, new_stmt);
2819 gimple_assign_set_lhs (new_stmt, new_temp);
2820 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2821 if (slp_node)
2822 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2823 }
2824
2825 if (slp_node)
2826 continue;
2827
2828 if (j == 0)
2829 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2830 else
2831 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2832
2833 prev_stmt_info = vinfo_for_stmt (new_stmt);
2834 }
2835
2836 VEC_free (tree, heap, vec_oprnds);
2837 return true;
2838 }
2839
2840
2841 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2842 either as shift by a scalar or by a vector. */
2843
2844 bool
2845 vect_supportable_shift (enum tree_code code, tree scalar_type)
2846 {
2847
2848 enum machine_mode vec_mode;
2849 optab optab;
2850 int icode;
2851 tree vectype;
2852
2853 vectype = get_vectype_for_scalar_type (scalar_type);
2854 if (!vectype)
2855 return false;
2856
2857 optab = optab_for_tree_code (code, vectype, optab_scalar);
2858 if (!optab
2859 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2860 {
2861 optab = optab_for_tree_code (code, vectype, optab_vector);
2862 if (!optab
2863 || (optab_handler (optab, TYPE_MODE (vectype))
2864 == CODE_FOR_nothing))
2865 return false;
2866 }
2867
2868 vec_mode = TYPE_MODE (vectype);
2869 icode = (int) optab_handler (optab, vec_mode);
2870 if (icode == CODE_FOR_nothing)
2871 return false;
2872
2873 return true;
2874 }
2875
2876
2877 /* Function vectorizable_shift.
2878
2879 Check if STMT performs a shift operation that can be vectorized.
2880 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2881 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2882 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2883
2884 static bool
2885 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2886 gimple *vec_stmt, slp_tree slp_node)
2887 {
2888 tree vec_dest;
2889 tree scalar_dest;
2890 tree op0, op1 = NULL;
2891 tree vec_oprnd1 = NULL_TREE;
2892 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2893 tree vectype;
2894 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2895 enum tree_code code;
2896 enum machine_mode vec_mode;
2897 tree new_temp;
2898 optab optab;
2899 int icode;
2900 enum machine_mode optab_op2_mode;
2901 tree def;
2902 gimple def_stmt;
2903 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2904 gimple new_stmt = NULL;
2905 stmt_vec_info prev_stmt_info;
2906 int nunits_in;
2907 int nunits_out;
2908 tree vectype_out;
2909 tree op1_vectype;
2910 int ncopies;
2911 int j, i;
2912 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2913 tree vop0, vop1;
2914 unsigned int k;
2915 bool scalar_shift_arg = true;
2916 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2917 int vf;
2918
2919 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2920 return false;
2921
2922 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2923 return false;
2924
2925 /* Is STMT a vectorizable binary/unary operation? */
2926 if (!is_gimple_assign (stmt))
2927 return false;
2928
2929 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2930 return false;
2931
2932 code = gimple_assign_rhs_code (stmt);
2933
2934 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2935 || code == RROTATE_EXPR))
2936 return false;
2937
2938 scalar_dest = gimple_assign_lhs (stmt);
2939 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2940 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2941 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2942 {
2943 if (vect_print_dump_info (REPORT_DETAILS))
2944 fprintf (vect_dump, "bit-precision shifts not supported.");
2945 return false;
2946 }
2947
2948 op0 = gimple_assign_rhs1 (stmt);
2949 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2950 &def_stmt, &def, &dt[0], &vectype))
2951 {
2952 if (vect_print_dump_info (REPORT_DETAILS))
2953 fprintf (vect_dump, "use not simple.");
2954 return false;
2955 }
2956 /* If op0 is an external or constant def use a vector type with
2957 the same size as the output vector type. */
2958 if (!vectype)
2959 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2960 if (vec_stmt)
2961 gcc_assert (vectype);
2962 if (!vectype)
2963 {
2964 if (vect_print_dump_info (REPORT_DETAILS))
2965 {
2966 fprintf (vect_dump, "no vectype for scalar type ");
2967 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2968 }
2969
2970 return false;
2971 }
2972
2973 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2974 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2975 if (nunits_out != nunits_in)
2976 return false;
2977
2978 op1 = gimple_assign_rhs2 (stmt);
2979 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2980 &dt[1], &op1_vectype))
2981 {
2982 if (vect_print_dump_info (REPORT_DETAILS))
2983 fprintf (vect_dump, "use not simple.");
2984 return false;
2985 }
2986
2987 if (loop_vinfo)
2988 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2989 else
2990 vf = 1;
2991
2992 /* Multiple types in SLP are handled by creating the appropriate number of
2993 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2994 case of SLP. */
2995 if (slp_node || PURE_SLP_STMT (stmt_info))
2996 ncopies = 1;
2997 else
2998 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2999
3000 gcc_assert (ncopies >= 1);
3001
3002 /* Determine whether the shift amount is a vector, or scalar. If the
3003 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3004
3005 if (dt[1] == vect_internal_def && !slp_node)
3006 scalar_shift_arg = false;
3007 else if (dt[1] == vect_constant_def
3008 || dt[1] == vect_external_def
3009 || dt[1] == vect_internal_def)
3010 {
3011 /* In SLP, need to check whether the shift count is the same,
3012 in loops if it is a constant or invariant, it is always
3013 a scalar shift. */
3014 if (slp_node)
3015 {
3016 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3017 gimple slpstmt;
3018
3019 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3020 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3021 scalar_shift_arg = false;
3022 }
3023 }
3024 else
3025 {
3026 if (vect_print_dump_info (REPORT_DETAILS))
3027 fprintf (vect_dump, "operand mode requires invariant argument.");
3028 return false;
3029 }
3030
3031 /* Vector shifted by vector. */
3032 if (!scalar_shift_arg)
3033 {
3034 optab = optab_for_tree_code (code, vectype, optab_vector);
3035 if (vect_print_dump_info (REPORT_DETAILS))
3036 fprintf (vect_dump, "vector/vector shift/rotate found.");
3037 if (!op1_vectype)
3038 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3039 if (op1_vectype == NULL_TREE
3040 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3041 {
3042 if (vect_print_dump_info (REPORT_DETAILS))
3043 fprintf (vect_dump, "unusable type for last operand in"
3044 " vector/vector shift/rotate.");
3045 return false;
3046 }
3047 }
3048 /* See if the machine has a vector shifted by scalar insn and if not
3049 then see if it has a vector shifted by vector insn. */
3050 else
3051 {
3052 optab = optab_for_tree_code (code, vectype, optab_scalar);
3053 if (optab
3054 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3055 {
3056 if (vect_print_dump_info (REPORT_DETAILS))
3057 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3058 }
3059 else
3060 {
3061 optab = optab_for_tree_code (code, vectype, optab_vector);
3062 if (optab
3063 && (optab_handler (optab, TYPE_MODE (vectype))
3064 != CODE_FOR_nothing))
3065 {
3066 scalar_shift_arg = false;
3067
3068 if (vect_print_dump_info (REPORT_DETAILS))
3069 fprintf (vect_dump, "vector/vector shift/rotate found.");
3070
3071 /* Unlike the other binary operators, shifts/rotates have
3072 the rhs being int, instead of the same type as the lhs,
3073 so make sure the scalar is the right type if we are
3074 dealing with vectors of long long/long/short/char. */
3075 if (dt[1] == vect_constant_def)
3076 op1 = fold_convert (TREE_TYPE (vectype), op1);
3077 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3078 TREE_TYPE (op1)))
3079 {
3080 if (slp_node
3081 && TYPE_MODE (TREE_TYPE (vectype))
3082 != TYPE_MODE (TREE_TYPE (op1)))
3083 {
3084 if (vect_print_dump_info (REPORT_DETAILS))
3085 fprintf (vect_dump, "unusable type for last operand in"
3086 " vector/vector shift/rotate.");
3087 return false;
3088 }
3089 if (vec_stmt && !slp_node)
3090 {
3091 op1 = fold_convert (TREE_TYPE (vectype), op1);
3092 op1 = vect_init_vector (stmt, op1,
3093 TREE_TYPE (vectype), NULL);
3094 }
3095 }
3096 }
3097 }
3098 }
3099
3100 /* Supportable by target? */
3101 if (!optab)
3102 {
3103 if (vect_print_dump_info (REPORT_DETAILS))
3104 fprintf (vect_dump, "no optab.");
3105 return false;
3106 }
3107 vec_mode = TYPE_MODE (vectype);
3108 icode = (int) optab_handler (optab, vec_mode);
3109 if (icode == CODE_FOR_nothing)
3110 {
3111 if (vect_print_dump_info (REPORT_DETAILS))
3112 fprintf (vect_dump, "op not supported by target.");
3113 /* Check only during analysis. */
3114 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3115 || (vf < vect_min_worthwhile_factor (code)
3116 && !vec_stmt))
3117 return false;
3118 if (vect_print_dump_info (REPORT_DETAILS))
3119 fprintf (vect_dump, "proceeding using word mode.");
3120 }
3121
3122 /* Worthwhile without SIMD support? Check only during analysis. */
3123 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3124 && vf < vect_min_worthwhile_factor (code)
3125 && !vec_stmt)
3126 {
3127 if (vect_print_dump_info (REPORT_DETAILS))
3128 fprintf (vect_dump, "not worthwhile without SIMD support.");
3129 return false;
3130 }
3131
3132 if (!vec_stmt) /* transformation not required. */
3133 {
3134 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3135 if (vect_print_dump_info (REPORT_DETAILS))
3136 fprintf (vect_dump, "=== vectorizable_shift ===");
3137 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3138 return true;
3139 }
3140
3141 /** Transform. **/
3142
3143 if (vect_print_dump_info (REPORT_DETAILS))
3144 fprintf (vect_dump, "transform binary/unary operation.");
3145
3146 /* Handle def. */
3147 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3148
3149 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3150 created in the previous stages of the recursion, so no allocation is
3151 needed, except for the case of shift with scalar shift argument. In that
3152 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3153 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3154 In case of loop-based vectorization we allocate VECs of size 1. We
3155 allocate VEC_OPRNDS1 only in case of binary operation. */
3156 if (!slp_node)
3157 {
3158 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3159 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3160 }
3161 else if (scalar_shift_arg)
3162 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3163
3164 prev_stmt_info = NULL;
3165 for (j = 0; j < ncopies; j++)
3166 {
3167 /* Handle uses. */
3168 if (j == 0)
3169 {
3170 if (scalar_shift_arg)
3171 {
3172 /* Vector shl and shr insn patterns can be defined with scalar
3173 operand 2 (shift operand). In this case, use constant or loop
3174 invariant op1 directly, without extending it to vector mode
3175 first. */
3176 optab_op2_mode = insn_data[icode].operand[2].mode;
3177 if (!VECTOR_MODE_P (optab_op2_mode))
3178 {
3179 if (vect_print_dump_info (REPORT_DETAILS))
3180 fprintf (vect_dump, "operand 1 using scalar mode.");
3181 vec_oprnd1 = op1;
3182 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3183 if (slp_node)
3184 {
3185 /* Store vec_oprnd1 for every vector stmt to be created
3186 for SLP_NODE. We check during the analysis that all
3187 the shift arguments are the same.
3188 TODO: Allow different constants for different vector
3189 stmts generated for an SLP instance. */
3190 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3191 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3192 }
3193 }
3194 }
3195
3196 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3197 (a special case for certain kind of vector shifts); otherwise,
3198 operand 1 should be of a vector type (the usual case). */
3199 if (vec_oprnd1)
3200 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3201 slp_node, -1);
3202 else
3203 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3204 slp_node, -1);
3205 }
3206 else
3207 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3208
3209 /* Arguments are ready. Create the new vector stmt. */
3210 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3211 {
3212 vop1 = VEC_index (tree, vec_oprnds1, i);
3213 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3214 new_temp = make_ssa_name (vec_dest, new_stmt);
3215 gimple_assign_set_lhs (new_stmt, new_temp);
3216 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3217 if (slp_node)
3218 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3219 }
3220
3221 if (slp_node)
3222 continue;
3223
3224 if (j == 0)
3225 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3226 else
3227 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3228 prev_stmt_info = vinfo_for_stmt (new_stmt);
3229 }
3230
3231 VEC_free (tree, heap, vec_oprnds0);
3232 VEC_free (tree, heap, vec_oprnds1);
3233
3234 return true;
3235 }
3236
3237
3238 /* Function vectorizable_operation.
3239
3240 Check if STMT performs a binary, unary or ternary operation that can
3241 be vectorized.
3242 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3243 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3244 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3245
3246 static bool
3247 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3248 gimple *vec_stmt, slp_tree slp_node)
3249 {
3250 tree vec_dest;
3251 tree scalar_dest;
3252 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3253 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3254 tree vectype;
3255 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3256 enum tree_code code;
3257 enum machine_mode vec_mode;
3258 tree new_temp;
3259 int op_type;
3260 optab optab;
3261 int icode;
3262 tree def;
3263 gimple def_stmt;
3264 enum vect_def_type dt[3]
3265 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3266 gimple new_stmt = NULL;
3267 stmt_vec_info prev_stmt_info;
3268 int nunits_in;
3269 int nunits_out;
3270 tree vectype_out;
3271 int ncopies;
3272 int j, i;
3273 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3274 tree vop0, vop1, vop2;
3275 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3276 int vf;
3277
3278 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3279 return false;
3280
3281 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3282 return false;
3283
3284 /* Is STMT a vectorizable binary/unary operation? */
3285 if (!is_gimple_assign (stmt))
3286 return false;
3287
3288 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3289 return false;
3290
3291 code = gimple_assign_rhs_code (stmt);
3292
3293 /* For pointer addition, we should use the normal plus for
3294 the vector addition. */
3295 if (code == POINTER_PLUS_EXPR)
3296 code = PLUS_EXPR;
3297
3298 /* Support only unary or binary operations. */
3299 op_type = TREE_CODE_LENGTH (code);
3300 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3301 {
3302 if (vect_print_dump_info (REPORT_DETAILS))
3303 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3304 op_type);
3305 return false;
3306 }
3307
3308 scalar_dest = gimple_assign_lhs (stmt);
3309 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3310
3311 /* Most operations cannot handle bit-precision types without extra
3312 truncations. */
3313 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3314 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3315 /* Exception are bitwise binary operations. */
3316 && code != BIT_IOR_EXPR
3317 && code != BIT_XOR_EXPR
3318 && code != BIT_AND_EXPR)
3319 {
3320 if (vect_print_dump_info (REPORT_DETAILS))
3321 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3322 return false;
3323 }
3324
3325 op0 = gimple_assign_rhs1 (stmt);
3326 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3327 &def_stmt, &def, &dt[0], &vectype))
3328 {
3329 if (vect_print_dump_info (REPORT_DETAILS))
3330 fprintf (vect_dump, "use not simple.");
3331 return false;
3332 }
3333 /* If op0 is an external or constant def use a vector type with
3334 the same size as the output vector type. */
3335 if (!vectype)
3336 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3337 if (vec_stmt)
3338 gcc_assert (vectype);
3339 if (!vectype)
3340 {
3341 if (vect_print_dump_info (REPORT_DETAILS))
3342 {
3343 fprintf (vect_dump, "no vectype for scalar type ");
3344 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3345 }
3346
3347 return false;
3348 }
3349
3350 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3351 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3352 if (nunits_out != nunits_in)
3353 return false;
3354
3355 if (op_type == binary_op || op_type == ternary_op)
3356 {
3357 op1 = gimple_assign_rhs2 (stmt);
3358 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
3359 &dt[1]))
3360 {
3361 if (vect_print_dump_info (REPORT_DETAILS))
3362 fprintf (vect_dump, "use not simple.");
3363 return false;
3364 }
3365 }
3366 if (op_type == ternary_op)
3367 {
3368 op2 = gimple_assign_rhs3 (stmt);
3369 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
3370 &dt[2]))
3371 {
3372 if (vect_print_dump_info (REPORT_DETAILS))
3373 fprintf (vect_dump, "use not simple.");
3374 return false;
3375 }
3376 }
3377
3378 if (loop_vinfo)
3379 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3380 else
3381 vf = 1;
3382
3383 /* Multiple types in SLP are handled by creating the appropriate number of
3384 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3385 case of SLP. */
3386 if (slp_node || PURE_SLP_STMT (stmt_info))
3387 ncopies = 1;
3388 else
3389 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3390
3391 gcc_assert (ncopies >= 1);
3392
3393 /* Shifts are handled in vectorizable_shift (). */
3394 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3395 || code == RROTATE_EXPR)
3396 return false;
3397
3398 optab = optab_for_tree_code (code, vectype, optab_default);
3399
3400 /* Supportable by target? */
3401 if (!optab)
3402 {
3403 if (vect_print_dump_info (REPORT_DETAILS))
3404 fprintf (vect_dump, "no optab.");
3405 return false;
3406 }
3407 vec_mode = TYPE_MODE (vectype);
3408 icode = (int) optab_handler (optab, vec_mode);
3409 if (icode == CODE_FOR_nothing)
3410 {
3411 if (vect_print_dump_info (REPORT_DETAILS))
3412 fprintf (vect_dump, "op not supported by target.");
3413 /* Check only during analysis. */
3414 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3415 || (vf < vect_min_worthwhile_factor (code)
3416 && !vec_stmt))
3417 return false;
3418 if (vect_print_dump_info (REPORT_DETAILS))
3419 fprintf (vect_dump, "proceeding using word mode.");
3420 }
3421
3422 /* Worthwhile without SIMD support? Check only during analysis. */
3423 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3424 && vf < vect_min_worthwhile_factor (code)
3425 && !vec_stmt)
3426 {
3427 if (vect_print_dump_info (REPORT_DETAILS))
3428 fprintf (vect_dump, "not worthwhile without SIMD support.");
3429 return false;
3430 }
3431
3432 if (!vec_stmt) /* transformation not required. */
3433 {
3434 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3435 if (vect_print_dump_info (REPORT_DETAILS))
3436 fprintf (vect_dump, "=== vectorizable_operation ===");
3437 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3438 return true;
3439 }
3440
3441 /** Transform. **/
3442
3443 if (vect_print_dump_info (REPORT_DETAILS))
3444 fprintf (vect_dump, "transform binary/unary operation.");
3445
3446 /* Handle def. */
3447 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3448
3449 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3450 created in the previous stages of the recursion, so no allocation is
3451 needed, except for the case of shift with scalar shift argument. In that
3452 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3453 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3454 In case of loop-based vectorization we allocate VECs of size 1. We
3455 allocate VEC_OPRNDS1 only in case of binary operation. */
3456 if (!slp_node)
3457 {
3458 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3459 if (op_type == binary_op || op_type == ternary_op)
3460 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3461 if (op_type == ternary_op)
3462 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3463 }
3464
3465 /* In case the vectorization factor (VF) is bigger than the number
3466 of elements that we can fit in a vectype (nunits), we have to generate
3467 more than one vector stmt - i.e - we need to "unroll" the
3468 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3469 from one copy of the vector stmt to the next, in the field
3470 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3471 stages to find the correct vector defs to be used when vectorizing
3472 stmts that use the defs of the current stmt. The example below
3473 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3474 we need to create 4 vectorized stmts):
3475
3476 before vectorization:
3477 RELATED_STMT VEC_STMT
3478 S1: x = memref - -
3479 S2: z = x + 1 - -
3480
3481 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3482 there):
3483 RELATED_STMT VEC_STMT
3484 VS1_0: vx0 = memref0 VS1_1 -
3485 VS1_1: vx1 = memref1 VS1_2 -
3486 VS1_2: vx2 = memref2 VS1_3 -
3487 VS1_3: vx3 = memref3 - -
3488 S1: x = load - VS1_0
3489 S2: z = x + 1 - -
3490
3491 step2: vectorize stmt S2 (done here):
3492 To vectorize stmt S2 we first need to find the relevant vector
3493 def for the first operand 'x'. This is, as usual, obtained from
3494 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3495 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3496 relevant vector def 'vx0'. Having found 'vx0' we can generate
3497 the vector stmt VS2_0, and as usual, record it in the
3498 STMT_VINFO_VEC_STMT of stmt S2.
3499 When creating the second copy (VS2_1), we obtain the relevant vector
3500 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3501 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3502 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3503 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3504 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3505 chain of stmts and pointers:
3506 RELATED_STMT VEC_STMT
3507 VS1_0: vx0 = memref0 VS1_1 -
3508 VS1_1: vx1 = memref1 VS1_2 -
3509 VS1_2: vx2 = memref2 VS1_3 -
3510 VS1_3: vx3 = memref3 - -
3511 S1: x = load - VS1_0
3512 VS2_0: vz0 = vx0 + v1 VS2_1 -
3513 VS2_1: vz1 = vx1 + v1 VS2_2 -
3514 VS2_2: vz2 = vx2 + v1 VS2_3 -
3515 VS2_3: vz3 = vx3 + v1 - -
3516 S2: z = x + 1 - VS2_0 */
3517
3518 prev_stmt_info = NULL;
3519 for (j = 0; j < ncopies; j++)
3520 {
3521 /* Handle uses. */
3522 if (j == 0)
3523 {
3524 if (op_type == binary_op || op_type == ternary_op)
3525 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3526 slp_node, -1);
3527 else
3528 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3529 slp_node, -1);
3530 if (op_type == ternary_op)
3531 {
3532 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3533 VEC_quick_push (tree, vec_oprnds2,
3534 vect_get_vec_def_for_operand (op2, stmt, NULL));
3535 }
3536 }
3537 else
3538 {
3539 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3540 if (op_type == ternary_op)
3541 {
3542 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3543 VEC_quick_push (tree, vec_oprnds2,
3544 vect_get_vec_def_for_stmt_copy (dt[2],
3545 vec_oprnd));
3546 }
3547 }
3548
3549 /* Arguments are ready. Create the new vector stmt. */
3550 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3551 {
3552 vop1 = ((op_type == binary_op || op_type == ternary_op)
3553 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3554 vop2 = ((op_type == ternary_op)
3555 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3556 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3557 vop0, vop1, vop2);
3558 new_temp = make_ssa_name (vec_dest, new_stmt);
3559 gimple_assign_set_lhs (new_stmt, new_temp);
3560 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3561 if (slp_node)
3562 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3563 }
3564
3565 if (slp_node)
3566 continue;
3567
3568 if (j == 0)
3569 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3570 else
3571 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3572 prev_stmt_info = vinfo_for_stmt (new_stmt);
3573 }
3574
3575 VEC_free (tree, heap, vec_oprnds0);
3576 if (vec_oprnds1)
3577 VEC_free (tree, heap, vec_oprnds1);
3578 if (vec_oprnds2)
3579 VEC_free (tree, heap, vec_oprnds2);
3580
3581 return true;
3582 }
3583
3584
3585 /* Function vectorizable_store.
3586
3587 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3588 can be vectorized.
3589 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3590 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3591 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3592
3593 static bool
3594 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3595 slp_tree slp_node)
3596 {
3597 tree scalar_dest;
3598 tree data_ref;
3599 tree op;
3600 tree vec_oprnd = NULL_TREE;
3601 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3602 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3603 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3604 tree elem_type;
3605 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3606 struct loop *loop = NULL;
3607 enum machine_mode vec_mode;
3608 tree dummy;
3609 enum dr_alignment_support alignment_support_scheme;
3610 tree def;
3611 gimple def_stmt;
3612 enum vect_def_type dt;
3613 stmt_vec_info prev_stmt_info = NULL;
3614 tree dataref_ptr = NULL_TREE;
3615 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3616 int ncopies;
3617 int j;
3618 gimple next_stmt, first_stmt = NULL;
3619 bool strided_store = false;
3620 bool store_lanes_p = false;
3621 unsigned int group_size, i;
3622 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3623 bool inv_p;
3624 VEC(tree,heap) *vec_oprnds = NULL;
3625 bool slp = (slp_node != NULL);
3626 unsigned int vec_num;
3627 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3628 tree aggr_type;
3629
3630 if (loop_vinfo)
3631 loop = LOOP_VINFO_LOOP (loop_vinfo);
3632
3633 /* Multiple types in SLP are handled by creating the appropriate number of
3634 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3635 case of SLP. */
3636 if (slp || PURE_SLP_STMT (stmt_info))
3637 ncopies = 1;
3638 else
3639 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3640
3641 gcc_assert (ncopies >= 1);
3642
3643 /* FORNOW. This restriction should be relaxed. */
3644 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3645 {
3646 if (vect_print_dump_info (REPORT_DETAILS))
3647 fprintf (vect_dump, "multiple types in nested loop.");
3648 return false;
3649 }
3650
3651 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3652 return false;
3653
3654 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3655 return false;
3656
3657 /* Is vectorizable store? */
3658
3659 if (!is_gimple_assign (stmt))
3660 return false;
3661
3662 scalar_dest = gimple_assign_lhs (stmt);
3663 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3664 && is_pattern_stmt_p (stmt_info))
3665 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3666 if (TREE_CODE (scalar_dest) != ARRAY_REF
3667 && TREE_CODE (scalar_dest) != INDIRECT_REF
3668 && TREE_CODE (scalar_dest) != COMPONENT_REF
3669 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3670 && TREE_CODE (scalar_dest) != REALPART_EXPR
3671 && TREE_CODE (scalar_dest) != MEM_REF)
3672 return false;
3673
3674 gcc_assert (gimple_assign_single_p (stmt));
3675 op = gimple_assign_rhs1 (stmt);
3676 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3677 {
3678 if (vect_print_dump_info (REPORT_DETAILS))
3679 fprintf (vect_dump, "use not simple.");
3680 return false;
3681 }
3682
3683 elem_type = TREE_TYPE (vectype);
3684 vec_mode = TYPE_MODE (vectype);
3685
3686 /* FORNOW. In some cases can vectorize even if data-type not supported
3687 (e.g. - array initialization with 0). */
3688 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3689 return false;
3690
3691 if (!STMT_VINFO_DATA_REF (stmt_info))
3692 return false;
3693
3694 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3695 {
3696 if (vect_print_dump_info (REPORT_DETAILS))
3697 fprintf (vect_dump, "negative step for store.");
3698 return false;
3699 }
3700
3701 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3702 {
3703 strided_store = true;
3704 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3705 if (!slp && !PURE_SLP_STMT (stmt_info))
3706 {
3707 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3708 if (vect_store_lanes_supported (vectype, group_size))
3709 store_lanes_p = true;
3710 else if (!vect_strided_store_supported (vectype, group_size))
3711 return false;
3712 }
3713
3714 if (first_stmt == stmt)
3715 {
3716 /* STMT is the leader of the group. Check the operands of all the
3717 stmts of the group. */
3718 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3719 while (next_stmt)
3720 {
3721 gcc_assert (gimple_assign_single_p (next_stmt));
3722 op = gimple_assign_rhs1 (next_stmt);
3723 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3724 &def, &dt))
3725 {
3726 if (vect_print_dump_info (REPORT_DETAILS))
3727 fprintf (vect_dump, "use not simple.");
3728 return false;
3729 }
3730 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3731 }
3732 }
3733 }
3734
3735 if (!vec_stmt) /* transformation not required. */
3736 {
3737 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3738 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3739 return true;
3740 }
3741
3742 /** Transform. **/
3743
3744 if (strided_store)
3745 {
3746 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3747 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3748
3749 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3750
3751 /* FORNOW */
3752 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3753
3754 /* We vectorize all the stmts of the interleaving group when we
3755 reach the last stmt in the group. */
3756 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3757 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3758 && !slp)
3759 {
3760 *vec_stmt = NULL;
3761 return true;
3762 }
3763
3764 if (slp)
3765 {
3766 strided_store = false;
3767 /* VEC_NUM is the number of vect stmts to be created for this
3768 group. */
3769 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3770 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3771 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3772 op = gimple_assign_rhs1 (first_stmt);
3773 }
3774 else
3775 /* VEC_NUM is the number of vect stmts to be created for this
3776 group. */
3777 vec_num = group_size;
3778 }
3779 else
3780 {
3781 first_stmt = stmt;
3782 first_dr = dr;
3783 group_size = vec_num = 1;
3784 }
3785
3786 if (vect_print_dump_info (REPORT_DETAILS))
3787 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3788
3789 dr_chain = VEC_alloc (tree, heap, group_size);
3790 oprnds = VEC_alloc (tree, heap, group_size);
3791
3792 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3793 gcc_assert (alignment_support_scheme);
3794 /* Targets with store-lane instructions must not require explicit
3795 realignment. */
3796 gcc_assert (!store_lanes_p
3797 || alignment_support_scheme == dr_aligned
3798 || alignment_support_scheme == dr_unaligned_supported);
3799
3800 if (store_lanes_p)
3801 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3802 else
3803 aggr_type = vectype;
3804
3805 /* In case the vectorization factor (VF) is bigger than the number
3806 of elements that we can fit in a vectype (nunits), we have to generate
3807 more than one vector stmt - i.e - we need to "unroll" the
3808 vector stmt by a factor VF/nunits. For more details see documentation in
3809 vect_get_vec_def_for_copy_stmt. */
3810
3811 /* In case of interleaving (non-unit strided access):
3812
3813 S1: &base + 2 = x2
3814 S2: &base = x0
3815 S3: &base + 1 = x1
3816 S4: &base + 3 = x3
3817
3818 We create vectorized stores starting from base address (the access of the
3819 first stmt in the chain (S2 in the above example), when the last store stmt
3820 of the chain (S4) is reached:
3821
3822 VS1: &base = vx2
3823 VS2: &base + vec_size*1 = vx0
3824 VS3: &base + vec_size*2 = vx1
3825 VS4: &base + vec_size*3 = vx3
3826
3827 Then permutation statements are generated:
3828
3829 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3830 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3831 ...
3832
3833 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3834 (the order of the data-refs in the output of vect_permute_store_chain
3835 corresponds to the order of scalar stmts in the interleaving chain - see
3836 the documentation of vect_permute_store_chain()).
3837
3838 In case of both multiple types and interleaving, above vector stores and
3839 permutation stmts are created for every copy. The result vector stmts are
3840 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3841 STMT_VINFO_RELATED_STMT for the next copies.
3842 */
3843
3844 prev_stmt_info = NULL;
3845 for (j = 0; j < ncopies; j++)
3846 {
3847 gimple new_stmt;
3848 gimple ptr_incr;
3849
3850 if (j == 0)
3851 {
3852 if (slp)
3853 {
3854 /* Get vectorized arguments for SLP_NODE. */
3855 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3856 NULL, slp_node, -1);
3857
3858 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3859 }
3860 else
3861 {
3862 /* For interleaved stores we collect vectorized defs for all the
3863 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3864 used as an input to vect_permute_store_chain(), and OPRNDS as
3865 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3866
3867 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3868 OPRNDS are of size 1. */
3869 next_stmt = first_stmt;
3870 for (i = 0; i < group_size; i++)
3871 {
3872 /* Since gaps are not supported for interleaved stores,
3873 GROUP_SIZE is the exact number of stmts in the chain.
3874 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3875 there is no interleaving, GROUP_SIZE is 1, and only one
3876 iteration of the loop will be executed. */
3877 gcc_assert (next_stmt
3878 && gimple_assign_single_p (next_stmt));
3879 op = gimple_assign_rhs1 (next_stmt);
3880
3881 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3882 NULL);
3883 VEC_quick_push(tree, dr_chain, vec_oprnd);
3884 VEC_quick_push(tree, oprnds, vec_oprnd);
3885 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3886 }
3887 }
3888
3889 /* We should have catched mismatched types earlier. */
3890 gcc_assert (useless_type_conversion_p (vectype,
3891 TREE_TYPE (vec_oprnd)));
3892 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3893 NULL_TREE, &dummy, gsi,
3894 &ptr_incr, false, &inv_p);
3895 gcc_assert (bb_vinfo || !inv_p);
3896 }
3897 else
3898 {
3899 /* For interleaved stores we created vectorized defs for all the
3900 defs stored in OPRNDS in the previous iteration (previous copy).
3901 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3902 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3903 next copy.
3904 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3905 OPRNDS are of size 1. */
3906 for (i = 0; i < group_size; i++)
3907 {
3908 op = VEC_index (tree, oprnds, i);
3909 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3910 &dt);
3911 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3912 VEC_replace(tree, dr_chain, i, vec_oprnd);
3913 VEC_replace(tree, oprnds, i, vec_oprnd);
3914 }
3915 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3916 TYPE_SIZE_UNIT (aggr_type));
3917 }
3918
3919 if (store_lanes_p)
3920 {
3921 tree vec_array;
3922
3923 /* Combine all the vectors into an array. */
3924 vec_array = create_vector_array (vectype, vec_num);
3925 for (i = 0; i < vec_num; i++)
3926 {
3927 vec_oprnd = VEC_index (tree, dr_chain, i);
3928 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3929 }
3930
3931 /* Emit:
3932 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3933 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3934 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3935 gimple_call_set_lhs (new_stmt, data_ref);
3936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3937 mark_symbols_for_renaming (new_stmt);
3938 }
3939 else
3940 {
3941 new_stmt = NULL;
3942 if (strided_store)
3943 {
3944 result_chain = VEC_alloc (tree, heap, group_size);
3945 /* Permute. */
3946 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3947 &result_chain);
3948 }
3949
3950 next_stmt = first_stmt;
3951 for (i = 0; i < vec_num; i++)
3952 {
3953 struct ptr_info_def *pi;
3954
3955 if (i > 0)
3956 /* Bump the vector pointer. */
3957 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3958 stmt, NULL_TREE);
3959
3960 if (slp)
3961 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3962 else if (strided_store)
3963 /* For strided stores vectorized defs are interleaved in
3964 vect_permute_store_chain(). */
3965 vec_oprnd = VEC_index (tree, result_chain, i);
3966
3967 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3968 build_int_cst (reference_alias_ptr_type
3969 (DR_REF (first_dr)), 0));
3970 pi = get_ptr_info (dataref_ptr);
3971 pi->align = TYPE_ALIGN_UNIT (vectype);
3972 if (aligned_access_p (first_dr))
3973 pi->misalign = 0;
3974 else if (DR_MISALIGNMENT (first_dr) == -1)
3975 {
3976 TREE_TYPE (data_ref)
3977 = build_aligned_type (TREE_TYPE (data_ref),
3978 TYPE_ALIGN (elem_type));
3979 pi->align = TYPE_ALIGN_UNIT (elem_type);
3980 pi->misalign = 0;
3981 }
3982 else
3983 {
3984 TREE_TYPE (data_ref)
3985 = build_aligned_type (TREE_TYPE (data_ref),
3986 TYPE_ALIGN (elem_type));
3987 pi->misalign = DR_MISALIGNMENT (first_dr);
3988 }
3989
3990 /* Arguments are ready. Create the new vector stmt. */
3991 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3992 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3993 mark_symbols_for_renaming (new_stmt);
3994
3995 if (slp)
3996 continue;
3997
3998 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3999 if (!next_stmt)
4000 break;
4001 }
4002 }
4003 if (!slp)
4004 {
4005 if (j == 0)
4006 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4007 else
4008 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4009 prev_stmt_info = vinfo_for_stmt (new_stmt);
4010 }
4011 }
4012
4013 VEC_free (tree, heap, dr_chain);
4014 VEC_free (tree, heap, oprnds);
4015 if (result_chain)
4016 VEC_free (tree, heap, result_chain);
4017 if (vec_oprnds)
4018 VEC_free (tree, heap, vec_oprnds);
4019
4020 return true;
4021 }
4022
4023 /* Given a vector type VECTYPE and permutation SEL returns
4024 the VECTOR_CST mask that implements the permutation of the
4025 vector elements. If that is impossible to do, returns NULL. */
4026
4027 static tree
4028 gen_perm_mask (tree vectype, unsigned char *sel)
4029 {
4030 tree mask_elt_type, mask_type, mask_vec;
4031 int i, nunits;
4032
4033 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4034
4035 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4036 return NULL;
4037
4038 mask_elt_type
4039 = lang_hooks.types.type_for_size
4040 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4041 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4042
4043 mask_vec = NULL;
4044 for (i = nunits - 1; i >= 0; i--)
4045 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4046 mask_vec);
4047 mask_vec = build_vector (mask_type, mask_vec);
4048
4049 return mask_vec;
4050 }
4051
4052 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4053 reversal of the vector elements. If that is impossible to do,
4054 returns NULL. */
4055
4056 static tree
4057 perm_mask_for_reverse (tree vectype)
4058 {
4059 int i, nunits;
4060 unsigned char *sel;
4061
4062 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4063 sel = XALLOCAVEC (unsigned char, nunits);
4064
4065 for (i = 0; i < nunits; ++i)
4066 sel[i] = nunits - 1 - i;
4067
4068 return gen_perm_mask (vectype, sel);
4069 }
4070
4071 /* Given a vector variable X and Y, that was generated for the scalar
4072 STMT, generate instructions to permute the vector elements of X and Y
4073 using permutation mask MASK_VEC, insert them at *GSI and return the
4074 permuted vector variable. */
4075
4076 static tree
4077 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4078 gimple_stmt_iterator *gsi)
4079 {
4080 tree vectype = TREE_TYPE (x);
4081 tree perm_dest, data_ref;
4082 gimple perm_stmt;
4083
4084 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4085 data_ref = make_ssa_name (perm_dest, NULL);
4086
4087 /* Generate the permute statement. */
4088 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4089 x, y, mask_vec);
4090 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4091
4092 return data_ref;
4093 }
4094
4095 /* vectorizable_load.
4096
4097 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4098 can be vectorized.
4099 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4100 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4101 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4102
4103 static bool
4104 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4105 slp_tree slp_node, slp_instance slp_node_instance)
4106 {
4107 tree scalar_dest;
4108 tree vec_dest = NULL;
4109 tree data_ref = NULL;
4110 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4111 stmt_vec_info prev_stmt_info;
4112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4113 struct loop *loop = NULL;
4114 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4115 bool nested_in_vect_loop = false;
4116 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4117 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4118 tree elem_type;
4119 tree new_temp;
4120 enum machine_mode mode;
4121 gimple new_stmt = NULL;
4122 tree dummy;
4123 enum dr_alignment_support alignment_support_scheme;
4124 tree dataref_ptr = NULL_TREE;
4125 gimple ptr_incr;
4126 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4127 int ncopies;
4128 int i, j, group_size;
4129 tree msq = NULL_TREE, lsq;
4130 tree offset = NULL_TREE;
4131 tree realignment_token = NULL_TREE;
4132 gimple phi = NULL;
4133 VEC(tree,heap) *dr_chain = NULL;
4134 bool strided_load = false;
4135 bool load_lanes_p = false;
4136 gimple first_stmt;
4137 bool inv_p;
4138 bool negative;
4139 bool compute_in_loop = false;
4140 struct loop *at_loop;
4141 int vec_num;
4142 bool slp = (slp_node != NULL);
4143 bool slp_perm = false;
4144 enum tree_code code;
4145 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4146 int vf;
4147 tree aggr_type;
4148 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4149 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4150 int gather_scale = 1;
4151 enum vect_def_type gather_dt = vect_unknown_def_type;
4152
4153 if (loop_vinfo)
4154 {
4155 loop = LOOP_VINFO_LOOP (loop_vinfo);
4156 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4157 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4158 }
4159 else
4160 vf = 1;
4161
4162 /* Multiple types in SLP are handled by creating the appropriate number of
4163 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4164 case of SLP. */
4165 if (slp || PURE_SLP_STMT (stmt_info))
4166 ncopies = 1;
4167 else
4168 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4169
4170 gcc_assert (ncopies >= 1);
4171
4172 /* FORNOW. This restriction should be relaxed. */
4173 if (nested_in_vect_loop && ncopies > 1)
4174 {
4175 if (vect_print_dump_info (REPORT_DETAILS))
4176 fprintf (vect_dump, "multiple types in nested loop.");
4177 return false;
4178 }
4179
4180 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4181 return false;
4182
4183 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4184 return false;
4185
4186 /* Is vectorizable load? */
4187 if (!is_gimple_assign (stmt))
4188 return false;
4189
4190 scalar_dest = gimple_assign_lhs (stmt);
4191 if (TREE_CODE (scalar_dest) != SSA_NAME)
4192 return false;
4193
4194 code = gimple_assign_rhs_code (stmt);
4195 if (code != ARRAY_REF
4196 && code != INDIRECT_REF
4197 && code != COMPONENT_REF
4198 && code != IMAGPART_EXPR
4199 && code != REALPART_EXPR
4200 && code != MEM_REF
4201 && TREE_CODE_CLASS (code) != tcc_declaration)
4202 return false;
4203
4204 if (!STMT_VINFO_DATA_REF (stmt_info))
4205 return false;
4206
4207 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4208 if (negative && ncopies > 1)
4209 {
4210 if (vect_print_dump_info (REPORT_DETAILS))
4211 fprintf (vect_dump, "multiple types with negative step.");
4212 return false;
4213 }
4214
4215 elem_type = TREE_TYPE (vectype);
4216 mode = TYPE_MODE (vectype);
4217
4218 /* FORNOW. In some cases can vectorize even if data-type not supported
4219 (e.g. - data copies). */
4220 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4221 {
4222 if (vect_print_dump_info (REPORT_DETAILS))
4223 fprintf (vect_dump, "Aligned load, but unsupported type.");
4224 return false;
4225 }
4226
4227 /* Check if the load is a part of an interleaving chain. */
4228 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4229 {
4230 strided_load = true;
4231 /* FORNOW */
4232 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4233
4234 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4235 if (!slp && !PURE_SLP_STMT (stmt_info))
4236 {
4237 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4238 if (vect_load_lanes_supported (vectype, group_size))
4239 load_lanes_p = true;
4240 else if (!vect_strided_load_supported (vectype, group_size))
4241 return false;
4242 }
4243 }
4244
4245 if (negative)
4246 {
4247 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4248 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4249 if (alignment_support_scheme != dr_aligned
4250 && alignment_support_scheme != dr_unaligned_supported)
4251 {
4252 if (vect_print_dump_info (REPORT_DETAILS))
4253 fprintf (vect_dump, "negative step but alignment required.");
4254 return false;
4255 }
4256 if (!perm_mask_for_reverse (vectype))
4257 {
4258 if (vect_print_dump_info (REPORT_DETAILS))
4259 fprintf (vect_dump, "negative step and reversing not supported.");
4260 return false;
4261 }
4262 }
4263
4264 if (STMT_VINFO_GATHER_P (stmt_info))
4265 {
4266 gimple def_stmt;
4267 tree def;
4268 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4269 &gather_off, &gather_scale);
4270 gcc_assert (gather_decl);
4271 if (!vect_is_simple_use_1 (gather_off, loop_vinfo, bb_vinfo,
4272 &def_stmt, &def, &gather_dt,
4273 &gather_off_vectype))
4274 {
4275 if (vect_print_dump_info (REPORT_DETAILS))
4276 fprintf (vect_dump, "gather index use not simple.");
4277 return false;
4278 }
4279 }
4280
4281 if (!vec_stmt) /* transformation not required. */
4282 {
4283 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4284 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4285 return true;
4286 }
4287
4288 if (vect_print_dump_info (REPORT_DETAILS))
4289 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4290
4291 /** Transform. **/
4292
4293 if (STMT_VINFO_GATHER_P (stmt_info))
4294 {
4295 tree vec_oprnd0 = NULL_TREE, op;
4296 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4297 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4298 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4299 edge pe = loop_preheader_edge (loop);
4300 gimple_seq seq;
4301 basic_block new_bb;
4302 enum { NARROW, NONE, WIDEN } modifier;
4303 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4304
4305 if (nunits == gather_off_nunits)
4306 modifier = NONE;
4307 else if (nunits == gather_off_nunits / 2)
4308 {
4309 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4310 modifier = WIDEN;
4311
4312 for (i = 0; i < gather_off_nunits; ++i)
4313 sel[i] = i | nunits;
4314
4315 perm_mask = gen_perm_mask (gather_off_vectype, sel);
4316 gcc_assert (perm_mask != NULL_TREE);
4317 }
4318 else if (nunits == gather_off_nunits * 2)
4319 {
4320 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4321 modifier = NARROW;
4322
4323 for (i = 0; i < nunits; ++i)
4324 sel[i] = i < gather_off_nunits
4325 ? i : i + nunits - gather_off_nunits;
4326
4327 perm_mask = gen_perm_mask (vectype, sel);
4328 gcc_assert (perm_mask != NULL_TREE);
4329 ncopies *= 2;
4330 }
4331 else
4332 gcc_unreachable ();
4333
4334 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4335 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4336 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4337 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4338 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4339 scaletype = TREE_VALUE (arglist);
4340 gcc_checking_assert (types_compatible_p (srctype, rettype)
4341 && types_compatible_p (srctype, masktype));
4342
4343 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4344
4345 ptr = fold_convert (ptrtype, gather_base);
4346 if (!is_gimple_min_invariant (ptr))
4347 {
4348 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4349 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4350 gcc_assert (!new_bb);
4351 }
4352
4353 /* Currently we support only unconditional gather loads,
4354 so mask should be all ones. */
4355 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4356 mask = build_int_cst (TREE_TYPE (masktype), -1);
4357 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4358 {
4359 REAL_VALUE_TYPE r;
4360 long tmp[6];
4361 for (j = 0; j < 6; ++j)
4362 tmp[j] = -1;
4363 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4364 mask = build_real (TREE_TYPE (masktype), r);
4365 }
4366 else
4367 gcc_unreachable ();
4368 mask = build_vector_from_val (masktype, mask);
4369 mask = vect_init_vector (stmt, mask, masktype, NULL);
4370
4371 scale = build_int_cst (scaletype, gather_scale);
4372
4373 prev_stmt_info = NULL;
4374 for (j = 0; j < ncopies; ++j)
4375 {
4376 if (modifier == WIDEN && (j & 1))
4377 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4378 perm_mask, stmt, gsi);
4379 else if (j == 0)
4380 op = vec_oprnd0
4381 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4382 else
4383 op = vec_oprnd0
4384 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4385
4386 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4387 {
4388 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4389 == TYPE_VECTOR_SUBPARTS (idxtype));
4390 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4391 add_referenced_var (var);
4392 var = make_ssa_name (var, NULL);
4393 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4394 new_stmt
4395 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4396 op, NULL_TREE);
4397 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4398 op = var;
4399 }
4400
4401 new_stmt
4402 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4403
4404 if (!useless_type_conversion_p (vectype, rettype))
4405 {
4406 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4407 == TYPE_VECTOR_SUBPARTS (rettype));
4408 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4409 add_referenced_var (var);
4410 op = make_ssa_name (var, new_stmt);
4411 gimple_call_set_lhs (new_stmt, op);
4412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4413 var = make_ssa_name (vec_dest, NULL);
4414 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4415 new_stmt
4416 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4417 NULL_TREE);
4418 }
4419 else
4420 {
4421 var = make_ssa_name (vec_dest, new_stmt);
4422 gimple_call_set_lhs (new_stmt, var);
4423 }
4424
4425 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4426
4427 if (modifier == NARROW)
4428 {
4429 if ((j & 1) == 0)
4430 {
4431 prev_res = var;
4432 continue;
4433 }
4434 var = permute_vec_elements (prev_res, var,
4435 perm_mask, stmt, gsi);
4436 new_stmt = SSA_NAME_DEF_STMT (var);
4437 }
4438
4439 if (prev_stmt_info == NULL)
4440 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4441 else
4442 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4443 prev_stmt_info = vinfo_for_stmt (new_stmt);
4444 }
4445 return true;
4446 }
4447
4448 if (strided_load)
4449 {
4450 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4451 if (slp
4452 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4453 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4454 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4455
4456 /* Check if the chain of loads is already vectorized. */
4457 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4458 {
4459 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4460 return true;
4461 }
4462 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4463 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4464
4465 /* VEC_NUM is the number of vect stmts to be created for this group. */
4466 if (slp)
4467 {
4468 strided_load = false;
4469 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4470 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4471 slp_perm = true;
4472 }
4473 else
4474 vec_num = group_size;
4475 }
4476 else
4477 {
4478 first_stmt = stmt;
4479 first_dr = dr;
4480 group_size = vec_num = 1;
4481 }
4482
4483 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4484 gcc_assert (alignment_support_scheme);
4485 /* Targets with load-lane instructions must not require explicit
4486 realignment. */
4487 gcc_assert (!load_lanes_p
4488 || alignment_support_scheme == dr_aligned
4489 || alignment_support_scheme == dr_unaligned_supported);
4490
4491 /* In case the vectorization factor (VF) is bigger than the number
4492 of elements that we can fit in a vectype (nunits), we have to generate
4493 more than one vector stmt - i.e - we need to "unroll" the
4494 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4495 from one copy of the vector stmt to the next, in the field
4496 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4497 stages to find the correct vector defs to be used when vectorizing
4498 stmts that use the defs of the current stmt. The example below
4499 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4500 need to create 4 vectorized stmts):
4501
4502 before vectorization:
4503 RELATED_STMT VEC_STMT
4504 S1: x = memref - -
4505 S2: z = x + 1 - -
4506
4507 step 1: vectorize stmt S1:
4508 We first create the vector stmt VS1_0, and, as usual, record a
4509 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4510 Next, we create the vector stmt VS1_1, and record a pointer to
4511 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4512 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4513 stmts and pointers:
4514 RELATED_STMT VEC_STMT
4515 VS1_0: vx0 = memref0 VS1_1 -
4516 VS1_1: vx1 = memref1 VS1_2 -
4517 VS1_2: vx2 = memref2 VS1_3 -
4518 VS1_3: vx3 = memref3 - -
4519 S1: x = load - VS1_0
4520 S2: z = x + 1 - -
4521
4522 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4523 information we recorded in RELATED_STMT field is used to vectorize
4524 stmt S2. */
4525
4526 /* In case of interleaving (non-unit strided access):
4527
4528 S1: x2 = &base + 2
4529 S2: x0 = &base
4530 S3: x1 = &base + 1
4531 S4: x3 = &base + 3
4532
4533 Vectorized loads are created in the order of memory accesses
4534 starting from the access of the first stmt of the chain:
4535
4536 VS1: vx0 = &base
4537 VS2: vx1 = &base + vec_size*1
4538 VS3: vx3 = &base + vec_size*2
4539 VS4: vx4 = &base + vec_size*3
4540
4541 Then permutation statements are generated:
4542
4543 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4544 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4545 ...
4546
4547 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4548 (the order of the data-refs in the output of vect_permute_load_chain
4549 corresponds to the order of scalar stmts in the interleaving chain - see
4550 the documentation of vect_permute_load_chain()).
4551 The generation of permutation stmts and recording them in
4552 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4553
4554 In case of both multiple types and interleaving, the vector loads and
4555 permutation stmts above are created for every copy. The result vector
4556 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4557 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4558
4559 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4560 on a target that supports unaligned accesses (dr_unaligned_supported)
4561 we generate the following code:
4562 p = initial_addr;
4563 indx = 0;
4564 loop {
4565 p = p + indx * vectype_size;
4566 vec_dest = *(p);
4567 indx = indx + 1;
4568 }
4569
4570 Otherwise, the data reference is potentially unaligned on a target that
4571 does not support unaligned accesses (dr_explicit_realign_optimized) -
4572 then generate the following code, in which the data in each iteration is
4573 obtained by two vector loads, one from the previous iteration, and one
4574 from the current iteration:
4575 p1 = initial_addr;
4576 msq_init = *(floor(p1))
4577 p2 = initial_addr + VS - 1;
4578 realignment_token = call target_builtin;
4579 indx = 0;
4580 loop {
4581 p2 = p2 + indx * vectype_size
4582 lsq = *(floor(p2))
4583 vec_dest = realign_load (msq, lsq, realignment_token)
4584 indx = indx + 1;
4585 msq = lsq;
4586 } */
4587
4588 /* If the misalignment remains the same throughout the execution of the
4589 loop, we can create the init_addr and permutation mask at the loop
4590 preheader. Otherwise, it needs to be created inside the loop.
4591 This can only occur when vectorizing memory accesses in the inner-loop
4592 nested within an outer-loop that is being vectorized. */
4593
4594 if (loop && nested_in_vect_loop_p (loop, stmt)
4595 && (TREE_INT_CST_LOW (DR_STEP (dr))
4596 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4597 {
4598 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4599 compute_in_loop = true;
4600 }
4601
4602 if ((alignment_support_scheme == dr_explicit_realign_optimized
4603 || alignment_support_scheme == dr_explicit_realign)
4604 && !compute_in_loop)
4605 {
4606 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4607 alignment_support_scheme, NULL_TREE,
4608 &at_loop);
4609 if (alignment_support_scheme == dr_explicit_realign_optimized)
4610 {
4611 phi = SSA_NAME_DEF_STMT (msq);
4612 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4613 }
4614 }
4615 else
4616 at_loop = loop;
4617
4618 if (negative)
4619 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4620
4621 if (load_lanes_p)
4622 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4623 else
4624 aggr_type = vectype;
4625
4626 prev_stmt_info = NULL;
4627 for (j = 0; j < ncopies; j++)
4628 {
4629 /* 1. Create the vector or array pointer update chain. */
4630 if (j == 0)
4631 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4632 offset, &dummy, gsi,
4633 &ptr_incr, false, &inv_p);
4634 else
4635 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4636 TYPE_SIZE_UNIT (aggr_type));
4637
4638 if (strided_load || slp_perm)
4639 dr_chain = VEC_alloc (tree, heap, vec_num);
4640
4641 if (load_lanes_p)
4642 {
4643 tree vec_array;
4644
4645 vec_array = create_vector_array (vectype, vec_num);
4646
4647 /* Emit:
4648 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4649 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4650 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4651 gimple_call_set_lhs (new_stmt, vec_array);
4652 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4653 mark_symbols_for_renaming (new_stmt);
4654
4655 /* Extract each vector into an SSA_NAME. */
4656 for (i = 0; i < vec_num; i++)
4657 {
4658 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4659 vec_array, i);
4660 VEC_quick_push (tree, dr_chain, new_temp);
4661 }
4662
4663 /* Record the mapping between SSA_NAMEs and statements. */
4664 vect_record_strided_load_vectors (stmt, dr_chain);
4665 }
4666 else
4667 {
4668 for (i = 0; i < vec_num; i++)
4669 {
4670 if (i > 0)
4671 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4672 stmt, NULL_TREE);
4673
4674 /* 2. Create the vector-load in the loop. */
4675 switch (alignment_support_scheme)
4676 {
4677 case dr_aligned:
4678 case dr_unaligned_supported:
4679 {
4680 struct ptr_info_def *pi;
4681 data_ref
4682 = build2 (MEM_REF, vectype, dataref_ptr,
4683 build_int_cst (reference_alias_ptr_type
4684 (DR_REF (first_dr)), 0));
4685 pi = get_ptr_info (dataref_ptr);
4686 pi->align = TYPE_ALIGN_UNIT (vectype);
4687 if (alignment_support_scheme == dr_aligned)
4688 {
4689 gcc_assert (aligned_access_p (first_dr));
4690 pi->misalign = 0;
4691 }
4692 else if (DR_MISALIGNMENT (first_dr) == -1)
4693 {
4694 TREE_TYPE (data_ref)
4695 = build_aligned_type (TREE_TYPE (data_ref),
4696 TYPE_ALIGN (elem_type));
4697 pi->align = TYPE_ALIGN_UNIT (elem_type);
4698 pi->misalign = 0;
4699 }
4700 else
4701 {
4702 TREE_TYPE (data_ref)
4703 = build_aligned_type (TREE_TYPE (data_ref),
4704 TYPE_ALIGN (elem_type));
4705 pi->misalign = DR_MISALIGNMENT (first_dr);
4706 }
4707 break;
4708 }
4709 case dr_explicit_realign:
4710 {
4711 tree ptr, bump;
4712 tree vs_minus_1;
4713
4714 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4715
4716 if (compute_in_loop)
4717 msq = vect_setup_realignment (first_stmt, gsi,
4718 &realignment_token,
4719 dr_explicit_realign,
4720 dataref_ptr, NULL);
4721
4722 new_stmt = gimple_build_assign_with_ops
4723 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4724 build_int_cst
4725 (TREE_TYPE (dataref_ptr),
4726 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4727 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4728 gimple_assign_set_lhs (new_stmt, ptr);
4729 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4730 data_ref
4731 = build2 (MEM_REF, vectype, ptr,
4732 build_int_cst (reference_alias_ptr_type
4733 (DR_REF (first_dr)), 0));
4734 vec_dest = vect_create_destination_var (scalar_dest,
4735 vectype);
4736 new_stmt = gimple_build_assign (vec_dest, data_ref);
4737 new_temp = make_ssa_name (vec_dest, new_stmt);
4738 gimple_assign_set_lhs (new_stmt, new_temp);
4739 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4740 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4741 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4742 msq = new_temp;
4743
4744 bump = size_binop (MULT_EXPR, vs_minus_1,
4745 TYPE_SIZE_UNIT (elem_type));
4746 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4747 new_stmt = gimple_build_assign_with_ops
4748 (BIT_AND_EXPR, NULL_TREE, ptr,
4749 build_int_cst
4750 (TREE_TYPE (ptr),
4751 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4752 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4753 gimple_assign_set_lhs (new_stmt, ptr);
4754 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4755 data_ref
4756 = build2 (MEM_REF, vectype, ptr,
4757 build_int_cst (reference_alias_ptr_type
4758 (DR_REF (first_dr)), 0));
4759 break;
4760 }
4761 case dr_explicit_realign_optimized:
4762 new_stmt = gimple_build_assign_with_ops
4763 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4764 build_int_cst
4765 (TREE_TYPE (dataref_ptr),
4766 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4767 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4768 new_stmt);
4769 gimple_assign_set_lhs (new_stmt, new_temp);
4770 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4771 data_ref
4772 = build2 (MEM_REF, vectype, new_temp,
4773 build_int_cst (reference_alias_ptr_type
4774 (DR_REF (first_dr)), 0));
4775 break;
4776 default:
4777 gcc_unreachable ();
4778 }
4779 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4780 new_stmt = gimple_build_assign (vec_dest, data_ref);
4781 new_temp = make_ssa_name (vec_dest, new_stmt);
4782 gimple_assign_set_lhs (new_stmt, new_temp);
4783 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4784 mark_symbols_for_renaming (new_stmt);
4785
4786 /* 3. Handle explicit realignment if necessary/supported.
4787 Create in loop:
4788 vec_dest = realign_load (msq, lsq, realignment_token) */
4789 if (alignment_support_scheme == dr_explicit_realign_optimized
4790 || alignment_support_scheme == dr_explicit_realign)
4791 {
4792 lsq = gimple_assign_lhs (new_stmt);
4793 if (!realignment_token)
4794 realignment_token = dataref_ptr;
4795 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4796 new_stmt
4797 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4798 vec_dest, msq, lsq,
4799 realignment_token);
4800 new_temp = make_ssa_name (vec_dest, new_stmt);
4801 gimple_assign_set_lhs (new_stmt, new_temp);
4802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4803
4804 if (alignment_support_scheme == dr_explicit_realign_optimized)
4805 {
4806 gcc_assert (phi);
4807 if (i == vec_num - 1 && j == ncopies - 1)
4808 add_phi_arg (phi, lsq,
4809 loop_latch_edge (containing_loop),
4810 UNKNOWN_LOCATION);
4811 msq = lsq;
4812 }
4813 }
4814
4815 /* 4. Handle invariant-load. */
4816 if (inv_p && !bb_vinfo)
4817 {
4818 tree tem, vec_inv;
4819 gimple_stmt_iterator gsi2 = *gsi;
4820 gcc_assert (!strided_load);
4821 gsi_next (&gsi2);
4822 tem = scalar_dest;
4823 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4824 TREE_TYPE (tem)))
4825 {
4826 tem = fold_convert (TREE_TYPE (vectype), tem);
4827 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4828 NULL_TREE, true,
4829 GSI_SAME_STMT);
4830 }
4831 vec_inv = build_vector_from_val (vectype, tem);
4832 new_temp = vect_init_vector (stmt, vec_inv,
4833 vectype, &gsi2);
4834 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4835 }
4836
4837 if (negative)
4838 {
4839 tree perm_mask = perm_mask_for_reverse (vectype);
4840 new_temp = permute_vec_elements (new_temp, new_temp,
4841 perm_mask, stmt, gsi);
4842 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4843 }
4844
4845 /* Collect vector loads and later create their permutation in
4846 vect_transform_strided_load (). */
4847 if (strided_load || slp_perm)
4848 VEC_quick_push (tree, dr_chain, new_temp);
4849
4850 /* Store vector loads in the corresponding SLP_NODE. */
4851 if (slp && !slp_perm)
4852 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4853 new_stmt);
4854 }
4855 }
4856
4857 if (slp && !slp_perm)
4858 continue;
4859
4860 if (slp_perm)
4861 {
4862 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4863 slp_node_instance, false))
4864 {
4865 VEC_free (tree, heap, dr_chain);
4866 return false;
4867 }
4868 }
4869 else
4870 {
4871 if (strided_load)
4872 {
4873 if (!load_lanes_p)
4874 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4875 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4876 }
4877 else
4878 {
4879 if (j == 0)
4880 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4881 else
4882 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4883 prev_stmt_info = vinfo_for_stmt (new_stmt);
4884 }
4885 }
4886 if (dr_chain)
4887 VEC_free (tree, heap, dr_chain);
4888 }
4889
4890 return true;
4891 }
4892
4893 /* Function vect_is_simple_cond.
4894
4895 Input:
4896 LOOP - the loop that is being vectorized.
4897 COND - Condition that is checked for simple use.
4898
4899 Output:
4900 *COMP_VECTYPE - the vector type for the comparison.
4901
4902 Returns whether a COND can be vectorized. Checks whether
4903 condition operands are supportable using vec_is_simple_use. */
4904
4905 static bool
4906 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
4907 tree *comp_vectype)
4908 {
4909 tree lhs, rhs;
4910 tree def;
4911 enum vect_def_type dt;
4912 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4913
4914 if (!COMPARISON_CLASS_P (cond))
4915 return false;
4916
4917 lhs = TREE_OPERAND (cond, 0);
4918 rhs = TREE_OPERAND (cond, 1);
4919
4920 if (TREE_CODE (lhs) == SSA_NAME)
4921 {
4922 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4923 if (!vect_is_simple_use_1 (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
4924 &dt, &vectype1))
4925 return false;
4926 }
4927 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4928 && TREE_CODE (lhs) != FIXED_CST)
4929 return false;
4930
4931 if (TREE_CODE (rhs) == SSA_NAME)
4932 {
4933 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4934 if (!vect_is_simple_use_1 (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
4935 &dt, &vectype2))
4936 return false;
4937 }
4938 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4939 && TREE_CODE (rhs) != FIXED_CST)
4940 return false;
4941
4942 *comp_vectype = vectype1 ? vectype1 : vectype2;
4943 return true;
4944 }
4945
4946 /* vectorizable_condition.
4947
4948 Check if STMT is conditional modify expression that can be vectorized.
4949 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4950 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4951 at GSI.
4952
4953 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4954 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4955 else caluse if it is 2).
4956
4957 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4958
4959 bool
4960 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4961 gimple *vec_stmt, tree reduc_def, int reduc_index,
4962 slp_tree slp_node)
4963 {
4964 tree scalar_dest = NULL_TREE;
4965 tree vec_dest = NULL_TREE;
4966 tree cond_expr, then_clause, else_clause;
4967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4968 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4969 tree comp_vectype;
4970 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4971 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4972 tree vec_compare, vec_cond_expr;
4973 tree new_temp;
4974 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4975 tree def;
4976 enum vect_def_type dt, dts[4];
4977 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4978 int ncopies;
4979 enum tree_code code;
4980 stmt_vec_info prev_stmt_info = NULL;
4981 int i, j;
4982 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4983 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
4984 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
4985
4986 if (slp_node || PURE_SLP_STMT (stmt_info))
4987 ncopies = 1;
4988 else
4989 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4990
4991 gcc_assert (ncopies >= 1);
4992 if (reduc_index && ncopies > 1)
4993 return false; /* FORNOW */
4994
4995 if (reduc_index && STMT_SLP_TYPE (stmt_info))
4996 return false;
4997
4998 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4999 return false;
5000
5001 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5002 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5003 && reduc_def))
5004 return false;
5005
5006 /* FORNOW: not yet supported. */
5007 if (STMT_VINFO_LIVE_P (stmt_info))
5008 {
5009 if (vect_print_dump_info (REPORT_DETAILS))
5010 fprintf (vect_dump, "value used after loop.");
5011 return false;
5012 }
5013
5014 /* Is vectorizable conditional operation? */
5015 if (!is_gimple_assign (stmt))
5016 return false;
5017
5018 code = gimple_assign_rhs_code (stmt);
5019
5020 if (code != COND_EXPR)
5021 return false;
5022
5023 cond_expr = gimple_assign_rhs1 (stmt);
5024 then_clause = gimple_assign_rhs2 (stmt);
5025 else_clause = gimple_assign_rhs3 (stmt);
5026
5027 if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo, &comp_vectype)
5028 || !comp_vectype)
5029 return false;
5030
5031 if (TREE_CODE (then_clause) == SSA_NAME)
5032 {
5033 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5034 if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
5035 &then_def_stmt, &def, &dt))
5036 return false;
5037 }
5038 else if (TREE_CODE (then_clause) != INTEGER_CST
5039 && TREE_CODE (then_clause) != REAL_CST
5040 && TREE_CODE (then_clause) != FIXED_CST)
5041 return false;
5042
5043 if (TREE_CODE (else_clause) == SSA_NAME)
5044 {
5045 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5046 if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
5047 &else_def_stmt, &def, &dt))
5048 return false;
5049 }
5050 else if (TREE_CODE (else_clause) != INTEGER_CST
5051 && TREE_CODE (else_clause) != REAL_CST
5052 && TREE_CODE (else_clause) != FIXED_CST)
5053 return false;
5054
5055 if (!vec_stmt)
5056 {
5057 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5058 return expand_vec_cond_expr_p (vectype, comp_vectype);
5059 }
5060
5061 /* Transform. */
5062
5063 if (!slp_node)
5064 {
5065 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5066 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5067 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5068 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5069 }
5070
5071 /* Handle def. */
5072 scalar_dest = gimple_assign_lhs (stmt);
5073 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5074
5075 /* Handle cond expr. */
5076 for (j = 0; j < ncopies; j++)
5077 {
5078 gimple new_stmt = NULL;
5079 if (j == 0)
5080 {
5081 if (slp_node)
5082 {
5083 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5084 VEC (slp_void_p, heap) *vec_defs;
5085
5086 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5087 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5088 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5089 VEC_safe_push (tree, heap, ops, then_clause);
5090 VEC_safe_push (tree, heap, ops, else_clause);
5091 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5092 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5093 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5094 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5095 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5096
5097 VEC_free (tree, heap, ops);
5098 VEC_free (slp_void_p, heap, vec_defs);
5099 }
5100 else
5101 {
5102 gimple gtemp;
5103 vec_cond_lhs =
5104 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5105 stmt, NULL);
5106 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
5107 NULL, &gtemp, &def, &dts[0]);
5108
5109 vec_cond_rhs =
5110 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5111 stmt, NULL);
5112 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
5113 NULL, &gtemp, &def, &dts[1]);
5114 if (reduc_index == 1)
5115 vec_then_clause = reduc_def;
5116 else
5117 {
5118 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5119 stmt, NULL);
5120 vect_is_simple_use (then_clause, loop_vinfo,
5121 NULL, &gtemp, &def, &dts[2]);
5122 }
5123 if (reduc_index == 2)
5124 vec_else_clause = reduc_def;
5125 else
5126 {
5127 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5128 stmt, NULL);
5129 vect_is_simple_use (else_clause, loop_vinfo,
5130 NULL, &gtemp, &def, &dts[3]);
5131 }
5132 }
5133 }
5134 else
5135 {
5136 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5137 VEC_pop (tree, vec_oprnds0));
5138 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5139 VEC_pop (tree, vec_oprnds1));
5140 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5141 VEC_pop (tree, vec_oprnds2));
5142 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5143 VEC_pop (tree, vec_oprnds3));
5144 }
5145
5146 if (!slp_node)
5147 {
5148 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5149 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5150 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5151 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5152 }
5153
5154 /* Arguments are ready. Create the new vector stmt. */
5155 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5156 {
5157 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5158 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5159 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5160
5161 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5162 vec_cond_lhs, vec_cond_rhs);
5163 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5164 vec_compare, vec_then_clause, vec_else_clause);
5165
5166 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5167 new_temp = make_ssa_name (vec_dest, new_stmt);
5168 gimple_assign_set_lhs (new_stmt, new_temp);
5169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5170 if (slp_node)
5171 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5172 }
5173
5174 if (slp_node)
5175 continue;
5176
5177 if (j == 0)
5178 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5179 else
5180 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5181
5182 prev_stmt_info = vinfo_for_stmt (new_stmt);
5183 }
5184
5185 VEC_free (tree, heap, vec_oprnds0);
5186 VEC_free (tree, heap, vec_oprnds1);
5187 VEC_free (tree, heap, vec_oprnds2);
5188 VEC_free (tree, heap, vec_oprnds3);
5189
5190 return true;
5191 }
5192
5193
5194 /* Make sure the statement is vectorizable. */
5195
5196 bool
5197 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5198 {
5199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5200 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5201 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5202 bool ok;
5203 tree scalar_type, vectype;
5204 gimple pattern_stmt, pattern_def_stmt;
5205
5206 if (vect_print_dump_info (REPORT_DETAILS))
5207 {
5208 fprintf (vect_dump, "==> examining statement: ");
5209 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5210 }
5211
5212 if (gimple_has_volatile_ops (stmt))
5213 {
5214 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5215 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5216
5217 return false;
5218 }
5219
5220 /* Skip stmts that do not need to be vectorized. In loops this is expected
5221 to include:
5222 - the COND_EXPR which is the loop exit condition
5223 - any LABEL_EXPRs in the loop
5224 - computations that are used only for array indexing or loop control.
5225 In basic blocks we only analyze statements that are a part of some SLP
5226 instance, therefore, all the statements are relevant.
5227
5228 Pattern statement needs to be analyzed instead of the original statement
5229 if the original statement is not relevant. Otherwise, we analyze both
5230 statements. */
5231
5232 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5233 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5234 && !STMT_VINFO_LIVE_P (stmt_info))
5235 {
5236 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5237 && pattern_stmt
5238 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5239 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5240 {
5241 /* Analyze PATTERN_STMT instead of the original stmt. */
5242 stmt = pattern_stmt;
5243 stmt_info = vinfo_for_stmt (pattern_stmt);
5244 if (vect_print_dump_info (REPORT_DETAILS))
5245 {
5246 fprintf (vect_dump, "==> examining pattern statement: ");
5247 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5248 }
5249 }
5250 else
5251 {
5252 if (vect_print_dump_info (REPORT_DETAILS))
5253 fprintf (vect_dump, "irrelevant.");
5254
5255 return true;
5256 }
5257 }
5258 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5259 && pattern_stmt
5260 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5261 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5262 {
5263 /* Analyze PATTERN_STMT too. */
5264 if (vect_print_dump_info (REPORT_DETAILS))
5265 {
5266 fprintf (vect_dump, "==> examining pattern statement: ");
5267 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5268 }
5269
5270 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5271 return false;
5272 }
5273
5274 if (is_pattern_stmt_p (stmt_info)
5275 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5276 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5277 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5278 {
5279 /* Analyze def stmt of STMT if it's a pattern stmt. */
5280 if (vect_print_dump_info (REPORT_DETAILS))
5281 {
5282 fprintf (vect_dump, "==> examining pattern def statement: ");
5283 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5284 }
5285
5286 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5287 return false;
5288 }
5289
5290
5291 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5292 {
5293 case vect_internal_def:
5294 break;
5295
5296 case vect_reduction_def:
5297 case vect_nested_cycle:
5298 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5299 || relevance == vect_used_in_outer_by_reduction
5300 || relevance == vect_unused_in_scope));
5301 break;
5302
5303 case vect_induction_def:
5304 case vect_constant_def:
5305 case vect_external_def:
5306 case vect_unknown_def_type:
5307 default:
5308 gcc_unreachable ();
5309 }
5310
5311 if (bb_vinfo)
5312 {
5313 gcc_assert (PURE_SLP_STMT (stmt_info));
5314
5315 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5316 if (vect_print_dump_info (REPORT_DETAILS))
5317 {
5318 fprintf (vect_dump, "get vectype for scalar type: ");
5319 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5320 }
5321
5322 vectype = get_vectype_for_scalar_type (scalar_type);
5323 if (!vectype)
5324 {
5325 if (vect_print_dump_info (REPORT_DETAILS))
5326 {
5327 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5328 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5329 }
5330 return false;
5331 }
5332
5333 if (vect_print_dump_info (REPORT_DETAILS))
5334 {
5335 fprintf (vect_dump, "vectype: ");
5336 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5337 }
5338
5339 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5340 }
5341
5342 if (STMT_VINFO_RELEVANT_P (stmt_info))
5343 {
5344 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5345 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5346 *need_to_vectorize = true;
5347 }
5348
5349 ok = true;
5350 if (!bb_vinfo
5351 && (STMT_VINFO_RELEVANT_P (stmt_info)
5352 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5353 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5354 || vectorizable_shift (stmt, NULL, NULL, NULL)
5355 || vectorizable_operation (stmt, NULL, NULL, NULL)
5356 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5357 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5358 || vectorizable_call (stmt, NULL, NULL, NULL)
5359 || vectorizable_store (stmt, NULL, NULL, NULL)
5360 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5361 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5362 else
5363 {
5364 if (bb_vinfo)
5365 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5366 || vectorizable_shift (stmt, NULL, NULL, node)
5367 || vectorizable_operation (stmt, NULL, NULL, node)
5368 || vectorizable_assignment (stmt, NULL, NULL, node)
5369 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5370 || vectorizable_call (stmt, NULL, NULL, node)
5371 || vectorizable_store (stmt, NULL, NULL, node)
5372 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5373 }
5374
5375 if (!ok)
5376 {
5377 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5378 {
5379 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5380 fprintf (vect_dump, "supported: ");
5381 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5382 }
5383
5384 return false;
5385 }
5386
5387 if (bb_vinfo)
5388 return true;
5389
5390 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5391 need extra handling, except for vectorizable reductions. */
5392 if (STMT_VINFO_LIVE_P (stmt_info)
5393 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5394 ok = vectorizable_live_operation (stmt, NULL, NULL);
5395
5396 if (!ok)
5397 {
5398 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5399 {
5400 fprintf (vect_dump, "not vectorized: live stmt not ");
5401 fprintf (vect_dump, "supported: ");
5402 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5403 }
5404
5405 return false;
5406 }
5407
5408 return true;
5409 }
5410
5411
5412 /* Function vect_transform_stmt.
5413
5414 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5415
5416 bool
5417 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5418 bool *strided_store, slp_tree slp_node,
5419 slp_instance slp_node_instance)
5420 {
5421 bool is_store = false;
5422 gimple vec_stmt = NULL;
5423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5424 bool done;
5425
5426 switch (STMT_VINFO_TYPE (stmt_info))
5427 {
5428 case type_demotion_vec_info_type:
5429 case type_promotion_vec_info_type:
5430 case type_conversion_vec_info_type:
5431 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5432 gcc_assert (done);
5433 break;
5434
5435 case induc_vec_info_type:
5436 gcc_assert (!slp_node);
5437 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5438 gcc_assert (done);
5439 break;
5440
5441 case shift_vec_info_type:
5442 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5443 gcc_assert (done);
5444 break;
5445
5446 case op_vec_info_type:
5447 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5448 gcc_assert (done);
5449 break;
5450
5451 case assignment_vec_info_type:
5452 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5453 gcc_assert (done);
5454 break;
5455
5456 case load_vec_info_type:
5457 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5458 slp_node_instance);
5459 gcc_assert (done);
5460 break;
5461
5462 case store_vec_info_type:
5463 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5464 gcc_assert (done);
5465 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5466 {
5467 /* In case of interleaving, the whole chain is vectorized when the
5468 last store in the chain is reached. Store stmts before the last
5469 one are skipped, and there vec_stmt_info shouldn't be freed
5470 meanwhile. */
5471 *strided_store = true;
5472 if (STMT_VINFO_VEC_STMT (stmt_info))
5473 is_store = true;
5474 }
5475 else
5476 is_store = true;
5477 break;
5478
5479 case condition_vec_info_type:
5480 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5481 gcc_assert (done);
5482 break;
5483
5484 case call_vec_info_type:
5485 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5486 stmt = gsi_stmt (*gsi);
5487 break;
5488
5489 case reduc_vec_info_type:
5490 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5491 gcc_assert (done);
5492 break;
5493
5494 default:
5495 if (!STMT_VINFO_LIVE_P (stmt_info))
5496 {
5497 if (vect_print_dump_info (REPORT_DETAILS))
5498 fprintf (vect_dump, "stmt not supported.");
5499 gcc_unreachable ();
5500 }
5501 }
5502
5503 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5504 is being vectorized, but outside the immediately enclosing loop. */
5505 if (vec_stmt
5506 && STMT_VINFO_LOOP_VINFO (stmt_info)
5507 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5508 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5509 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5510 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5511 || STMT_VINFO_RELEVANT (stmt_info) ==
5512 vect_used_in_outer_by_reduction))
5513 {
5514 struct loop *innerloop = LOOP_VINFO_LOOP (
5515 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5516 imm_use_iterator imm_iter;
5517 use_operand_p use_p;
5518 tree scalar_dest;
5519 gimple exit_phi;
5520
5521 if (vect_print_dump_info (REPORT_DETAILS))
5522 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5523
5524 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5525 (to be used when vectorizing outer-loop stmts that use the DEF of
5526 STMT). */
5527 if (gimple_code (stmt) == GIMPLE_PHI)
5528 scalar_dest = PHI_RESULT (stmt);
5529 else
5530 scalar_dest = gimple_assign_lhs (stmt);
5531
5532 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5533 {
5534 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5535 {
5536 exit_phi = USE_STMT (use_p);
5537 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5538 }
5539 }
5540 }
5541
5542 /* Handle stmts whose DEF is used outside the loop-nest that is
5543 being vectorized. */
5544 if (STMT_VINFO_LIVE_P (stmt_info)
5545 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5546 {
5547 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5548 gcc_assert (done);
5549 }
5550
5551 if (vec_stmt)
5552 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5553
5554 return is_store;
5555 }
5556
5557
5558 /* Remove a group of stores (for SLP or interleaving), free their
5559 stmt_vec_info. */
5560
5561 void
5562 vect_remove_stores (gimple first_stmt)
5563 {
5564 gimple next = first_stmt;
5565 gimple tmp;
5566 gimple_stmt_iterator next_si;
5567
5568 while (next)
5569 {
5570 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5571
5572 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5573 if (is_pattern_stmt_p (stmt_info))
5574 next = STMT_VINFO_RELATED_STMT (stmt_info);
5575 /* Free the attached stmt_vec_info and remove the stmt. */
5576 next_si = gsi_for_stmt (next);
5577 gsi_remove (&next_si, true);
5578 free_stmt_vec_info (next);
5579 next = tmp;
5580 }
5581 }
5582
5583
5584 /* Function new_stmt_vec_info.
5585
5586 Create and initialize a new stmt_vec_info struct for STMT. */
5587
5588 stmt_vec_info
5589 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5590 bb_vec_info bb_vinfo)
5591 {
5592 stmt_vec_info res;
5593 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5594
5595 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5596 STMT_VINFO_STMT (res) = stmt;
5597 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5598 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5599 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5600 STMT_VINFO_LIVE_P (res) = false;
5601 STMT_VINFO_VECTYPE (res) = NULL;
5602 STMT_VINFO_VEC_STMT (res) = NULL;
5603 STMT_VINFO_VECTORIZABLE (res) = true;
5604 STMT_VINFO_IN_PATTERN_P (res) = false;
5605 STMT_VINFO_RELATED_STMT (res) = NULL;
5606 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5607 STMT_VINFO_DATA_REF (res) = NULL;
5608
5609 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5610 STMT_VINFO_DR_OFFSET (res) = NULL;
5611 STMT_VINFO_DR_INIT (res) = NULL;
5612 STMT_VINFO_DR_STEP (res) = NULL;
5613 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5614
5615 if (gimple_code (stmt) == GIMPLE_PHI
5616 && is_loop_header_bb_p (gimple_bb (stmt)))
5617 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5618 else
5619 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5620
5621 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5622 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5623 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5624 STMT_SLP_TYPE (res) = loop_vect;
5625 GROUP_FIRST_ELEMENT (res) = NULL;
5626 GROUP_NEXT_ELEMENT (res) = NULL;
5627 GROUP_SIZE (res) = 0;
5628 GROUP_STORE_COUNT (res) = 0;
5629 GROUP_GAP (res) = 0;
5630 GROUP_SAME_DR_STMT (res) = NULL;
5631 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5632
5633 return res;
5634 }
5635
5636
5637 /* Create a hash table for stmt_vec_info. */
5638
5639 void
5640 init_stmt_vec_info_vec (void)
5641 {
5642 gcc_assert (!stmt_vec_info_vec);
5643 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5644 }
5645
5646
5647 /* Free hash table for stmt_vec_info. */
5648
5649 void
5650 free_stmt_vec_info_vec (void)
5651 {
5652 gcc_assert (stmt_vec_info_vec);
5653 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5654 }
5655
5656
5657 /* Free stmt vectorization related info. */
5658
5659 void
5660 free_stmt_vec_info (gimple stmt)
5661 {
5662 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5663
5664 if (!stmt_info)
5665 return;
5666
5667 /* Check if this statement has a related "pattern stmt"
5668 (introduced by the vectorizer during the pattern recognition
5669 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5670 too. */
5671 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5672 {
5673 stmt_vec_info patt_info
5674 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5675 if (patt_info)
5676 {
5677 if (STMT_VINFO_PATTERN_DEF_STMT (patt_info))
5678 free_stmt_vec_info (STMT_VINFO_PATTERN_DEF_STMT (patt_info));
5679 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5680 }
5681 }
5682
5683 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5684 set_vinfo_for_stmt (stmt, NULL);
5685 free (stmt_info);
5686 }
5687
5688
5689 /* Function get_vectype_for_scalar_type_and_size.
5690
5691 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5692 by the target. */
5693
5694 static tree
5695 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5696 {
5697 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5698 enum machine_mode simd_mode;
5699 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5700 int nunits;
5701 tree vectype;
5702
5703 if (nbytes == 0)
5704 return NULL_TREE;
5705
5706 /* We can't build a vector type of elements with alignment bigger than
5707 their size. */
5708 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5709 return NULL_TREE;
5710
5711 /* For vector types of elements whose mode precision doesn't
5712 match their types precision we use a element type of mode
5713 precision. The vectorization routines will have to make sure
5714 they support the proper result truncation/extension. */
5715 if (INTEGRAL_TYPE_P (scalar_type)
5716 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5717 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5718 TYPE_UNSIGNED (scalar_type));
5719
5720 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5721 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5722 return NULL_TREE;
5723
5724 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5725 When the component mode passes the above test simply use a type
5726 corresponding to that mode. The theory is that any use that
5727 would cause problems with this will disable vectorization anyway. */
5728 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5729 && !INTEGRAL_TYPE_P (scalar_type)
5730 && !POINTER_TYPE_P (scalar_type))
5731 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5732
5733 /* If no size was supplied use the mode the target prefers. Otherwise
5734 lookup a vector mode of the specified size. */
5735 if (size == 0)
5736 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5737 else
5738 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5739 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5740 if (nunits <= 1)
5741 return NULL_TREE;
5742
5743 vectype = build_vector_type (scalar_type, nunits);
5744 if (vect_print_dump_info (REPORT_DETAILS))
5745 {
5746 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5747 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5748 }
5749
5750 if (!vectype)
5751 return NULL_TREE;
5752
5753 if (vect_print_dump_info (REPORT_DETAILS))
5754 {
5755 fprintf (vect_dump, "vectype: ");
5756 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5757 }
5758
5759 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5760 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5761 {
5762 if (vect_print_dump_info (REPORT_DETAILS))
5763 fprintf (vect_dump, "mode not supported by target.");
5764 return NULL_TREE;
5765 }
5766
5767 return vectype;
5768 }
5769
5770 unsigned int current_vector_size;
5771
5772 /* Function get_vectype_for_scalar_type.
5773
5774 Returns the vector type corresponding to SCALAR_TYPE as supported
5775 by the target. */
5776
5777 tree
5778 get_vectype_for_scalar_type (tree scalar_type)
5779 {
5780 tree vectype;
5781 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5782 current_vector_size);
5783 if (vectype
5784 && current_vector_size == 0)
5785 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5786 return vectype;
5787 }
5788
5789 /* Function get_same_sized_vectype
5790
5791 Returns a vector type corresponding to SCALAR_TYPE of size
5792 VECTOR_TYPE if supported by the target. */
5793
5794 tree
5795 get_same_sized_vectype (tree scalar_type, tree vector_type)
5796 {
5797 return get_vectype_for_scalar_type_and_size
5798 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5799 }
5800
5801 /* Function vect_is_simple_use.
5802
5803 Input:
5804 LOOP_VINFO - the vect info of the loop that is being vectorized.
5805 BB_VINFO - the vect info of the basic block that is being vectorized.
5806 OPERAND - operand of a stmt in the loop or bb.
5807 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5808
5809 Returns whether a stmt with OPERAND can be vectorized.
5810 For loops, supportable operands are constants, loop invariants, and operands
5811 that are defined by the current iteration of the loop. Unsupportable
5812 operands are those that are defined by a previous iteration of the loop (as
5813 is the case in reduction/induction computations).
5814 For basic blocks, supportable operands are constants and bb invariants.
5815 For now, operands defined outside the basic block are not supported. */
5816
5817 bool
5818 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5819 bb_vec_info bb_vinfo, gimple *def_stmt,
5820 tree *def, enum vect_def_type *dt)
5821 {
5822 basic_block bb;
5823 stmt_vec_info stmt_vinfo;
5824 struct loop *loop = NULL;
5825
5826 if (loop_vinfo)
5827 loop = LOOP_VINFO_LOOP (loop_vinfo);
5828
5829 *def_stmt = NULL;
5830 *def = NULL_TREE;
5831
5832 if (vect_print_dump_info (REPORT_DETAILS))
5833 {
5834 fprintf (vect_dump, "vect_is_simple_use: operand ");
5835 print_generic_expr (vect_dump, operand, TDF_SLIM);
5836 }
5837
5838 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5839 {
5840 *dt = vect_constant_def;
5841 return true;
5842 }
5843
5844 if (is_gimple_min_invariant (operand))
5845 {
5846 *def = operand;
5847 *dt = vect_external_def;
5848 return true;
5849 }
5850
5851 if (TREE_CODE (operand) == PAREN_EXPR)
5852 {
5853 if (vect_print_dump_info (REPORT_DETAILS))
5854 fprintf (vect_dump, "non-associatable copy.");
5855 operand = TREE_OPERAND (operand, 0);
5856 }
5857
5858 if (TREE_CODE (operand) != SSA_NAME)
5859 {
5860 if (vect_print_dump_info (REPORT_DETAILS))
5861 fprintf (vect_dump, "not ssa-name.");
5862 return false;
5863 }
5864
5865 *def_stmt = SSA_NAME_DEF_STMT (operand);
5866 if (*def_stmt == NULL)
5867 {
5868 if (vect_print_dump_info (REPORT_DETAILS))
5869 fprintf (vect_dump, "no def_stmt.");
5870 return false;
5871 }
5872
5873 if (vect_print_dump_info (REPORT_DETAILS))
5874 {
5875 fprintf (vect_dump, "def_stmt: ");
5876 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5877 }
5878
5879 /* Empty stmt is expected only in case of a function argument.
5880 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5881 if (gimple_nop_p (*def_stmt))
5882 {
5883 *def = operand;
5884 *dt = vect_external_def;
5885 return true;
5886 }
5887
5888 bb = gimple_bb (*def_stmt);
5889
5890 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5891 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5892 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5893 *dt = vect_external_def;
5894 else
5895 {
5896 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5897 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5898 }
5899
5900 if (*dt == vect_unknown_def_type)
5901 {
5902 if (vect_print_dump_info (REPORT_DETAILS))
5903 fprintf (vect_dump, "Unsupported pattern.");
5904 return false;
5905 }
5906
5907 if (vect_print_dump_info (REPORT_DETAILS))
5908 fprintf (vect_dump, "type of def: %d.",*dt);
5909
5910 switch (gimple_code (*def_stmt))
5911 {
5912 case GIMPLE_PHI:
5913 *def = gimple_phi_result (*def_stmt);
5914 break;
5915
5916 case GIMPLE_ASSIGN:
5917 *def = gimple_assign_lhs (*def_stmt);
5918 break;
5919
5920 case GIMPLE_CALL:
5921 *def = gimple_call_lhs (*def_stmt);
5922 if (*def != NULL)
5923 break;
5924 /* FALLTHRU */
5925 default:
5926 if (vect_print_dump_info (REPORT_DETAILS))
5927 fprintf (vect_dump, "unsupported defining stmt: ");
5928 return false;
5929 }
5930
5931 return true;
5932 }
5933
5934 /* Function vect_is_simple_use_1.
5935
5936 Same as vect_is_simple_use_1 but also determines the vector operand
5937 type of OPERAND and stores it to *VECTYPE. If the definition of
5938 OPERAND is vect_uninitialized_def, vect_constant_def or
5939 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5940 is responsible to compute the best suited vector type for the
5941 scalar operand. */
5942
5943 bool
5944 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5945 bb_vec_info bb_vinfo, gimple *def_stmt,
5946 tree *def, enum vect_def_type *dt, tree *vectype)
5947 {
5948 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5949 return false;
5950
5951 /* Now get a vector type if the def is internal, otherwise supply
5952 NULL_TREE and leave it up to the caller to figure out a proper
5953 type for the use stmt. */
5954 if (*dt == vect_internal_def
5955 || *dt == vect_induction_def
5956 || *dt == vect_reduction_def
5957 || *dt == vect_double_reduction_def
5958 || *dt == vect_nested_cycle)
5959 {
5960 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5961
5962 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5963 && !STMT_VINFO_RELEVANT (stmt_info)
5964 && !STMT_VINFO_LIVE_P (stmt_info))
5965 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5966
5967 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5968 gcc_assert (*vectype != NULL_TREE);
5969 }
5970 else if (*dt == vect_uninitialized_def
5971 || *dt == vect_constant_def
5972 || *dt == vect_external_def)
5973 *vectype = NULL_TREE;
5974 else
5975 gcc_unreachable ();
5976
5977 return true;
5978 }
5979
5980
5981 /* Function supportable_widening_operation
5982
5983 Check whether an operation represented by the code CODE is a
5984 widening operation that is supported by the target platform in
5985 vector form (i.e., when operating on arguments of type VECTYPE_IN
5986 producing a result of type VECTYPE_OUT).
5987
5988 Widening operations we currently support are NOP (CONVERT), FLOAT
5989 and WIDEN_MULT. This function checks if these operations are supported
5990 by the target platform either directly (via vector tree-codes), or via
5991 target builtins.
5992
5993 Output:
5994 - CODE1 and CODE2 are codes of vector operations to be used when
5995 vectorizing the operation, if available.
5996 - DECL1 and DECL2 are decls of target builtin functions to be used
5997 when vectorizing the operation, if available. In this case,
5998 CODE1 and CODE2 are CALL_EXPR.
5999 - MULTI_STEP_CVT determines the number of required intermediate steps in
6000 case of multi-step conversion (like char->short->int - in that case
6001 MULTI_STEP_CVT will be 1).
6002 - INTERM_TYPES contains the intermediate type required to perform the
6003 widening operation (short in the above example). */
6004
6005 bool
6006 supportable_widening_operation (enum tree_code code, gimple stmt,
6007 tree vectype_out, tree vectype_in,
6008 tree *decl1, tree *decl2,
6009 enum tree_code *code1, enum tree_code *code2,
6010 int *multi_step_cvt,
6011 VEC (tree, heap) **interm_types)
6012 {
6013 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6014 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6015 struct loop *vect_loop = NULL;
6016 bool ordered_p;
6017 enum machine_mode vec_mode;
6018 enum insn_code icode1, icode2;
6019 optab optab1, optab2;
6020 tree vectype = vectype_in;
6021 tree wide_vectype = vectype_out;
6022 enum tree_code c1, c2;
6023 int i;
6024 tree prev_type, intermediate_type;
6025 enum machine_mode intermediate_mode, prev_mode;
6026 optab optab3, optab4;
6027
6028 *multi_step_cvt = 0;
6029 if (loop_info)
6030 vect_loop = LOOP_VINFO_LOOP (loop_info);
6031
6032 /* The result of a vectorized widening operation usually requires two vectors
6033 (because the widened results do not fit into one vector). The generated
6034 vector results would normally be expected to be generated in the same
6035 order as in the original scalar computation, i.e. if 8 results are
6036 generated in each vector iteration, they are to be organized as follows:
6037 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6038
6039 However, in the special case that the result of the widening operation is
6040 used in a reduction computation only, the order doesn't matter (because
6041 when vectorizing a reduction we change the order of the computation).
6042 Some targets can take advantage of this and generate more efficient code.
6043 For example, targets like Altivec, that support widen_mult using a sequence
6044 of {mult_even,mult_odd} generate the following vectors:
6045 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6046
6047 When vectorizing outer-loops, we execute the inner-loop sequentially
6048 (each vectorized inner-loop iteration contributes to VF outer-loop
6049 iterations in parallel). We therefore don't allow to change the order
6050 of the computation in the inner-loop during outer-loop vectorization. */
6051
6052 if (vect_loop
6053 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6054 && !nested_in_vect_loop_p (vect_loop, stmt))
6055 ordered_p = false;
6056 else
6057 ordered_p = true;
6058
6059 if (!ordered_p
6060 && code == WIDEN_MULT_EXPR
6061 && targetm.vectorize.builtin_mul_widen_even
6062 && targetm.vectorize.builtin_mul_widen_even (vectype)
6063 && targetm.vectorize.builtin_mul_widen_odd
6064 && targetm.vectorize.builtin_mul_widen_odd (vectype))
6065 {
6066 if (vect_print_dump_info (REPORT_DETAILS))
6067 fprintf (vect_dump, "Unordered widening operation detected.");
6068
6069 *code1 = *code2 = CALL_EXPR;
6070 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6071 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6072 return true;
6073 }
6074
6075 switch (code)
6076 {
6077 case WIDEN_MULT_EXPR:
6078 c1 = VEC_WIDEN_MULT_LO_EXPR;
6079 c2 = VEC_WIDEN_MULT_HI_EXPR;
6080 break;
6081
6082 case WIDEN_LSHIFT_EXPR:
6083 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6084 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6085 break;
6086
6087 CASE_CONVERT:
6088 c1 = VEC_UNPACK_LO_EXPR;
6089 c2 = VEC_UNPACK_HI_EXPR;
6090 break;
6091
6092 case FLOAT_EXPR:
6093 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6094 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6095 break;
6096
6097 case FIX_TRUNC_EXPR:
6098 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6099 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6100 computing the operation. */
6101 return false;
6102
6103 default:
6104 gcc_unreachable ();
6105 }
6106
6107 if (BYTES_BIG_ENDIAN)
6108 {
6109 enum tree_code ctmp = c1;
6110 c1 = c2;
6111 c2 = ctmp;
6112 }
6113
6114 if (code == FIX_TRUNC_EXPR)
6115 {
6116 /* The signedness is determined from output operand. */
6117 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6118 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6119 }
6120 else
6121 {
6122 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6123 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6124 }
6125
6126 if (!optab1 || !optab2)
6127 return false;
6128
6129 vec_mode = TYPE_MODE (vectype);
6130 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6131 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6132 return false;
6133
6134 *code1 = c1;
6135 *code2 = c2;
6136
6137 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6138 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6139 return true;
6140
6141 /* Check if it's a multi-step conversion that can be done using intermediate
6142 types. */
6143
6144 prev_type = vectype;
6145 prev_mode = vec_mode;
6146
6147 if (!CONVERT_EXPR_CODE_P (code))
6148 return false;
6149
6150 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6151 intermediate steps in promotion sequence. We try
6152 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6153 not. */
6154 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6155 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6156 {
6157 intermediate_mode = insn_data[icode1].operand[0].mode;
6158 intermediate_type
6159 = lang_hooks.types.type_for_mode (intermediate_mode,
6160 TYPE_UNSIGNED (prev_type));
6161 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6162 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6163
6164 if (!optab3 || !optab4
6165 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6166 || insn_data[icode1].operand[0].mode != intermediate_mode
6167 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6168 || insn_data[icode2].operand[0].mode != intermediate_mode
6169 || ((icode1 = optab_handler (optab3, intermediate_mode))
6170 == CODE_FOR_nothing)
6171 || ((icode2 = optab_handler (optab4, intermediate_mode))
6172 == CODE_FOR_nothing))
6173 break;
6174
6175 VEC_quick_push (tree, *interm_types, intermediate_type);
6176 (*multi_step_cvt)++;
6177
6178 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6179 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6180 return true;
6181
6182 prev_type = intermediate_type;
6183 prev_mode = intermediate_mode;
6184 }
6185
6186 VEC_free (tree, heap, *interm_types);
6187 return false;
6188 }
6189
6190
6191 /* Function supportable_narrowing_operation
6192
6193 Check whether an operation represented by the code CODE is a
6194 narrowing operation that is supported by the target platform in
6195 vector form (i.e., when operating on arguments of type VECTYPE_IN
6196 and producing a result of type VECTYPE_OUT).
6197
6198 Narrowing operations we currently support are NOP (CONVERT) and
6199 FIX_TRUNC. This function checks if these operations are supported by
6200 the target platform directly via vector tree-codes.
6201
6202 Output:
6203 - CODE1 is the code of a vector operation to be used when
6204 vectorizing the operation, if available.
6205 - MULTI_STEP_CVT determines the number of required intermediate steps in
6206 case of multi-step conversion (like int->short->char - in that case
6207 MULTI_STEP_CVT will be 1).
6208 - INTERM_TYPES contains the intermediate type required to perform the
6209 narrowing operation (short in the above example). */
6210
6211 bool
6212 supportable_narrowing_operation (enum tree_code code,
6213 tree vectype_out, tree vectype_in,
6214 enum tree_code *code1, int *multi_step_cvt,
6215 VEC (tree, heap) **interm_types)
6216 {
6217 enum machine_mode vec_mode;
6218 enum insn_code icode1;
6219 optab optab1, interm_optab;
6220 tree vectype = vectype_in;
6221 tree narrow_vectype = vectype_out;
6222 enum tree_code c1;
6223 tree intermediate_type;
6224 enum machine_mode intermediate_mode, prev_mode;
6225 int i;
6226 bool uns;
6227
6228 *multi_step_cvt = 0;
6229 switch (code)
6230 {
6231 CASE_CONVERT:
6232 c1 = VEC_PACK_TRUNC_EXPR;
6233 break;
6234
6235 case FIX_TRUNC_EXPR:
6236 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6237 break;
6238
6239 case FLOAT_EXPR:
6240 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6241 tree code and optabs used for computing the operation. */
6242 return false;
6243
6244 default:
6245 gcc_unreachable ();
6246 }
6247
6248 if (code == FIX_TRUNC_EXPR)
6249 /* The signedness is determined from output operand. */
6250 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6251 else
6252 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6253
6254 if (!optab1)
6255 return false;
6256
6257 vec_mode = TYPE_MODE (vectype);
6258 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6259 return false;
6260
6261 *code1 = c1;
6262
6263 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6264 return true;
6265
6266 /* Check if it's a multi-step conversion that can be done using intermediate
6267 types. */
6268 prev_mode = vec_mode;
6269 if (code == FIX_TRUNC_EXPR)
6270 uns = TYPE_UNSIGNED (vectype_out);
6271 else
6272 uns = TYPE_UNSIGNED (vectype);
6273
6274 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6275 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6276 costly than signed. */
6277 if (code == FIX_TRUNC_EXPR && uns)
6278 {
6279 enum insn_code icode2;
6280
6281 intermediate_type
6282 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6283 interm_optab
6284 = optab_for_tree_code (c1, intermediate_type, optab_default);
6285 if (interm_optab != NULL
6286 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6287 && insn_data[icode1].operand[0].mode
6288 == insn_data[icode2].operand[0].mode)
6289 {
6290 uns = false;
6291 optab1 = interm_optab;
6292 icode1 = icode2;
6293 }
6294 }
6295
6296 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6297 intermediate steps in promotion sequence. We try
6298 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6299 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6300 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6301 {
6302 intermediate_mode = insn_data[icode1].operand[0].mode;
6303 intermediate_type
6304 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6305 interm_optab
6306 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6307 optab_default);
6308 if (!interm_optab
6309 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6310 || insn_data[icode1].operand[0].mode != intermediate_mode
6311 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6312 == CODE_FOR_nothing))
6313 break;
6314
6315 VEC_quick_push (tree, *interm_types, intermediate_type);
6316 (*multi_step_cvt)++;
6317
6318 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6319 return true;
6320
6321 prev_mode = intermediate_mode;
6322 optab1 = interm_optab;
6323 }
6324
6325 VEC_free (tree, heap, *interm_types);
6326 return false;
6327 }