tree-vect-stmts.c (vectorizable_shift): Give up if op1 has different vector mode...
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43
44
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
46
47 static tree
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49 {
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 "vect_array");
52 }
53
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
58
59 static tree
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
62 {
63 tree vect_type, vect, vect_name, array_ref;
64 gimple new_stmt;
65
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
72
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
78
79 return vect_name;
80 }
81
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
85
86 static void
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
89 {
90 tree array_ref;
91 gimple new_stmt;
92
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
96
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
100 }
101
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
104 (and its group). */
105
106 static tree
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
108 {
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
111
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
117 pi->misalign = 0;
118 return mem_ref;
119 }
120
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
122
123 /* Function vect_mark_relevant.
124
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
126
127 static void
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
131 {
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
135 gimple pattern_stmt;
136
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
139
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
145 {
146 bool found = false;
147 if (!used_in_pattern)
148 {
149 imm_use_iterator imm_iter;
150 use_operand_p use_p;
151 gimple use_stmt;
152 tree lhs;
153
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
156 else
157 lhs = gimple_call_lhs (stmt);
158
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
161 stmt. */
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
164 {
165 if (is_gimple_debug (USE_STMT (use_p)))
166 continue;
167 use_stmt = USE_STMT (use_p);
168
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
171 {
172 found = true;
173 break;
174 }
175 }
176 }
177
178 if (!found)
179 {
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
184
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
186
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
189 " relevant/live.");
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
194 stmt = pattern_stmt;
195 }
196 }
197
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
201
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
204 {
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
207 return;
208 }
209
210 VEC_safe_push (gimple, heap, *worklist, stmt);
211 }
212
213
214 /* Function vect_stmt_relevant_p.
215
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
218
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
223
224 CHECKME: what other side effects would the vectorizer allow? */
225
226 static bool
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
229 {
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231 ssa_op_iter op_iter;
232 imm_use_iterator imm_iter;
233 use_operand_p use_p;
234 def_operand_p def_p;
235
236 *relevant = vect_unused_in_scope;
237 *live_p = false;
238
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
244
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
248 {
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
252 }
253
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
256 {
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
258 {
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
261 {
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
264
265 if (is_gimple_debug (USE_STMT (use_p)))
266 continue;
267
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
272
273 *live_p = true;
274 }
275 }
276 }
277
278 return (*live_p || *relevant);
279 }
280
281
282 /* Function exist_non_indexing_operands_for_use_p
283
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
286
287 static bool
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
289 {
290 tree operand;
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
292
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
297 return true;
298
299 /* STMT has a data_ref. FORNOW this means that its of one of
300 the following forms:
301 -1- ARRAY_REF = var
302 -2- var = ARRAY_REF
303 (This should have been verified in analyze_data_refs).
304
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
307 for array indexing.
308
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
311
312 if (!gimple_assign_copy_p (stmt))
313 return false;
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
315 return false;
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
318 return false;
319
320 if (operand == use)
321 return true;
322
323 return false;
324 }
325
326
327 /*
328 Function process_use.
329
330 Inputs:
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
335
336 Outputs:
337 Generally, LIVE_P and RELEVANT are used to define the liveness and
338 relevance info of the DEF_STMT of this USE:
339 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
340 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
341 Exceptions:
342 - case 1: If USE is used only for address computations (e.g. array indexing),
343 which does not need to be directly vectorized, then the liveness/relevance
344 of the respective DEF_STMT is left unchanged.
345 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
346 skip DEF_STMT cause it had already been processed.
347 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
348 be modified accordingly.
349
350 Return true if everything is as expected. Return false otherwise. */
351
352 static bool
353 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
354 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
355 {
356 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
358 stmt_vec_info dstmt_vinfo;
359 basic_block bb, def_bb;
360 tree def;
361 gimple def_stmt;
362 enum vect_def_type dt;
363
364 /* case 1: we are only interested in uses that need to be vectorized. Uses
365 that are used for address computation are not considered relevant. */
366 if (!exist_non_indexing_operands_for_use_p (use, stmt))
367 return true;
368
369 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
370 {
371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
372 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
373 return false;
374 }
375
376 if (!def_stmt || gimple_nop_p (def_stmt))
377 return true;
378
379 def_bb = gimple_bb (def_stmt);
380 if (!flow_bb_inside_loop_p (loop, def_bb))
381 {
382 if (vect_print_dump_info (REPORT_DETAILS))
383 fprintf (vect_dump, "def_stmt is out of loop.");
384 return true;
385 }
386
387 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
388 DEF_STMT must have already been processed, because this should be the
389 only way that STMT, which is a reduction-phi, was put in the worklist,
390 as there should be no other uses for DEF_STMT in the loop. So we just
391 check that everything is as expected, and we are done. */
392 dstmt_vinfo = vinfo_for_stmt (def_stmt);
393 bb = gimple_bb (stmt);
394 if (gimple_code (stmt) == GIMPLE_PHI
395 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
396 && gimple_code (def_stmt) != GIMPLE_PHI
397 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
398 && bb->loop_father == def_bb->loop_father)
399 {
400 if (vect_print_dump_info (REPORT_DETAILS))
401 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
402 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
403 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
404 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
405 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
406 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
407 return true;
408 }
409
410 /* case 3a: outer-loop stmt defining an inner-loop stmt:
411 outer-loop-header-bb:
412 d = def_stmt
413 inner-loop:
414 stmt # use (d)
415 outer-loop-tail-bb:
416 ... */
417 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
418 {
419 if (vect_print_dump_info (REPORT_DETAILS))
420 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
421
422 switch (relevant)
423 {
424 case vect_unused_in_scope:
425 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
426 vect_used_in_scope : vect_unused_in_scope;
427 break;
428
429 case vect_used_in_outer_by_reduction:
430 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
431 relevant = vect_used_by_reduction;
432 break;
433
434 case vect_used_in_outer:
435 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
436 relevant = vect_used_in_scope;
437 break;
438
439 case vect_used_in_scope:
440 break;
441
442 default:
443 gcc_unreachable ();
444 }
445 }
446
447 /* case 3b: inner-loop stmt defining an outer-loop stmt:
448 outer-loop-header-bb:
449 ...
450 inner-loop:
451 d = def_stmt
452 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
453 stmt # use (d) */
454 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
455 {
456 if (vect_print_dump_info (REPORT_DETAILS))
457 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
458
459 switch (relevant)
460 {
461 case vect_unused_in_scope:
462 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
463 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
464 vect_used_in_outer_by_reduction : vect_unused_in_scope;
465 break;
466
467 case vect_used_by_reduction:
468 relevant = vect_used_in_outer_by_reduction;
469 break;
470
471 case vect_used_in_scope:
472 relevant = vect_used_in_outer;
473 break;
474
475 default:
476 gcc_unreachable ();
477 }
478 }
479
480 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
481 is_pattern_stmt_p (stmt_vinfo));
482 return true;
483 }
484
485
486 /* Function vect_mark_stmts_to_be_vectorized.
487
488 Not all stmts in the loop need to be vectorized. For example:
489
490 for i...
491 for j...
492 1. T0 = i + j
493 2. T1 = a[T0]
494
495 3. j = j + 1
496
497 Stmt 1 and 3 do not need to be vectorized, because loop control and
498 addressing of vectorized data-refs are handled differently.
499
500 This pass detects such stmts. */
501
502 bool
503 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
504 {
505 VEC(gimple,heap) *worklist;
506 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
507 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
508 unsigned int nbbs = loop->num_nodes;
509 gimple_stmt_iterator si;
510 gimple stmt;
511 unsigned int i;
512 stmt_vec_info stmt_vinfo;
513 basic_block bb;
514 gimple phi;
515 bool live_p;
516 enum vect_relevant relevant, tmp_relevant;
517 enum vect_def_type def_type;
518
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
521
522 worklist = VEC_alloc (gimple, heap, 64);
523
524 /* 1. Init worklist. */
525 for (i = 0; i < nbbs; i++)
526 {
527 bb = bbs[i];
528 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
529 {
530 phi = gsi_stmt (si);
531 if (vect_print_dump_info (REPORT_DETAILS))
532 {
533 fprintf (vect_dump, "init: phi relevant? ");
534 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
535 }
536
537 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
538 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
539 }
540 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
541 {
542 stmt = gsi_stmt (si);
543 if (vect_print_dump_info (REPORT_DETAILS))
544 {
545 fprintf (vect_dump, "init: stmt relevant? ");
546 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
547 }
548
549 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
550 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
551 }
552 }
553
554 /* 2. Process_worklist */
555 while (VEC_length (gimple, worklist) > 0)
556 {
557 use_operand_p use_p;
558 ssa_op_iter iter;
559
560 stmt = VEC_pop (gimple, worklist);
561 if (vect_print_dump_info (REPORT_DETAILS))
562 {
563 fprintf (vect_dump, "worklist: examine stmt: ");
564 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
565 }
566
567 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
568 (DEF_STMT) as relevant/irrelevant and live/dead according to the
569 liveness and relevance properties of STMT. */
570 stmt_vinfo = vinfo_for_stmt (stmt);
571 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
572 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
573
574 /* Generally, the liveness and relevance properties of STMT are
575 propagated as is to the DEF_STMTs of its USEs:
576 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
577 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
578
579 One exception is when STMT has been identified as defining a reduction
580 variable; in this case we set the liveness/relevance as follows:
581 live_p = false
582 relevant = vect_used_by_reduction
583 This is because we distinguish between two kinds of relevant stmts -
584 those that are used by a reduction computation, and those that are
585 (also) used by a regular computation. This allows us later on to
586 identify stmts that are used solely by a reduction, and therefore the
587 order of the results that they produce does not have to be kept. */
588
589 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
590 tmp_relevant = relevant;
591 switch (def_type)
592 {
593 case vect_reduction_def:
594 switch (tmp_relevant)
595 {
596 case vect_unused_in_scope:
597 relevant = vect_used_by_reduction;
598 break;
599
600 case vect_used_by_reduction:
601 if (gimple_code (stmt) == GIMPLE_PHI)
602 break;
603 /* fall through */
604
605 default:
606 if (vect_print_dump_info (REPORT_DETAILS))
607 fprintf (vect_dump, "unsupported use of reduction.");
608
609 VEC_free (gimple, heap, worklist);
610 return false;
611 }
612
613 live_p = false;
614 break;
615
616 case vect_nested_cycle:
617 if (tmp_relevant != vect_unused_in_scope
618 && tmp_relevant != vect_used_in_outer_by_reduction
619 && tmp_relevant != vect_used_in_outer)
620 {
621 if (vect_print_dump_info (REPORT_DETAILS))
622 fprintf (vect_dump, "unsupported use of nested cycle.");
623
624 VEC_free (gimple, heap, worklist);
625 return false;
626 }
627
628 live_p = false;
629 break;
630
631 case vect_double_reduction_def:
632 if (tmp_relevant != vect_unused_in_scope
633 && tmp_relevant != vect_used_by_reduction)
634 {
635 if (vect_print_dump_info (REPORT_DETAILS))
636 fprintf (vect_dump, "unsupported use of double reduction.");
637
638 VEC_free (gimple, heap, worklist);
639 return false;
640 }
641
642 live_p = false;
643 break;
644
645 default:
646 break;
647 }
648
649 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
650 {
651 /* Pattern statements are not inserted into the code, so
652 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
653 have to scan the RHS or function arguments instead. */
654 if (is_gimple_assign (stmt))
655 {
656 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
657 tree op = gimple_assign_rhs1 (stmt);
658
659 i = 1;
660 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
661 {
662 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
663 live_p, relevant, &worklist)
664 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
665 live_p, relevant, &worklist))
666 {
667 VEC_free (gimple, heap, worklist);
668 return false;
669 }
670 i = 2;
671 }
672 for (; i < gimple_num_ops (stmt); i++)
673 {
674 op = gimple_op (stmt, i);
675 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
676 &worklist))
677 {
678 VEC_free (gimple, heap, worklist);
679 return false;
680 }
681 }
682 }
683 else if (is_gimple_call (stmt))
684 {
685 for (i = 0; i < gimple_call_num_args (stmt); i++)
686 {
687 tree arg = gimple_call_arg (stmt, i);
688 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
689 &worklist))
690 {
691 VEC_free (gimple, heap, worklist);
692 return false;
693 }
694 }
695 }
696 }
697 else
698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
699 {
700 tree op = USE_FROM_PTR (use_p);
701 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
702 &worklist))
703 {
704 VEC_free (gimple, heap, worklist);
705 return false;
706 }
707 }
708 } /* while worklist */
709
710 VEC_free (gimple, heap, worklist);
711 return true;
712 }
713
714
715 /* Get cost by calling cost target builtin. */
716
717 static inline
718 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
719 {
720 tree dummy_type = NULL;
721 int dummy = 0;
722
723 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
724 dummy_type, dummy);
725 }
726
727
728 /* Get cost for STMT. */
729
730 int
731 cost_for_stmt (gimple stmt)
732 {
733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
734
735 switch (STMT_VINFO_TYPE (stmt_info))
736 {
737 case load_vec_info_type:
738 return vect_get_stmt_cost (scalar_load);
739 case store_vec_info_type:
740 return vect_get_stmt_cost (scalar_store);
741 case op_vec_info_type:
742 case condition_vec_info_type:
743 case assignment_vec_info_type:
744 case reduc_vec_info_type:
745 case induc_vec_info_type:
746 case type_promotion_vec_info_type:
747 case type_demotion_vec_info_type:
748 case type_conversion_vec_info_type:
749 case call_vec_info_type:
750 return vect_get_stmt_cost (scalar_stmt);
751 case undef_vec_info_type:
752 default:
753 gcc_unreachable ();
754 }
755 }
756
757 /* Function vect_model_simple_cost.
758
759 Models cost for simple operations, i.e. those that only emit ncopies of a
760 single op. Right now, this does not account for multiple insns that could
761 be generated for the single vector op. We will handle that shortly. */
762
763 void
764 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
765 enum vect_def_type *dt, slp_tree slp_node)
766 {
767 int i;
768 int inside_cost = 0, outside_cost = 0;
769
770 /* The SLP costs were already calculated during SLP tree build. */
771 if (PURE_SLP_STMT (stmt_info))
772 return;
773
774 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
775
776 /* FORNOW: Assuming maximum 2 args per stmts. */
777 for (i = 0; i < 2; i++)
778 {
779 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
780 outside_cost += vect_get_stmt_cost (vector_stmt);
781 }
782
783 if (vect_print_dump_info (REPORT_COST))
784 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
785 "outside_cost = %d .", inside_cost, outside_cost);
786
787 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
788 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
789 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
790 }
791
792
793 /* Function vect_cost_strided_group_size
794
795 For strided load or store, return the group_size only if it is the first
796 load or store of a group, else return 1. This ensures that group size is
797 only returned once per group. */
798
799 static int
800 vect_cost_strided_group_size (stmt_vec_info stmt_info)
801 {
802 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
803
804 if (first_stmt == STMT_VINFO_STMT (stmt_info))
805 return GROUP_SIZE (stmt_info);
806
807 return 1;
808 }
809
810
811 /* Function vect_model_store_cost
812
813 Models cost for stores. In the case of strided accesses, one access
814 has the overhead of the strided access attributed to it. */
815
816 void
817 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
818 bool store_lanes_p, enum vect_def_type dt,
819 slp_tree slp_node)
820 {
821 int group_size;
822 unsigned int inside_cost = 0, outside_cost = 0;
823 struct data_reference *first_dr;
824 gimple first_stmt;
825
826 /* The SLP costs were already calculated during SLP tree build. */
827 if (PURE_SLP_STMT (stmt_info))
828 return;
829
830 if (dt == vect_constant_def || dt == vect_external_def)
831 outside_cost = vect_get_stmt_cost (scalar_to_vec);
832
833 /* Strided access? */
834 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
835 {
836 if (slp_node)
837 {
838 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
839 group_size = 1;
840 }
841 else
842 {
843 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
844 group_size = vect_cost_strided_group_size (stmt_info);
845 }
846
847 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
848 }
849 /* Not a strided access. */
850 else
851 {
852 group_size = 1;
853 first_dr = STMT_VINFO_DATA_REF (stmt_info);
854 }
855
856 /* We assume that the cost of a single store-lanes instruction is
857 equivalent to the cost of GROUP_SIZE separate stores. If a strided
858 access is instead being provided by a permute-and-store operation,
859 include the cost of the permutes. */
860 if (!store_lanes_p && group_size > 1)
861 {
862 /* Uses a high and low interleave operation for each needed permute. */
863 inside_cost = ncopies * exact_log2(group_size) * group_size
864 * vect_get_stmt_cost (vector_stmt);
865
866 if (vect_print_dump_info (REPORT_COST))
867 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
868 group_size);
869
870 }
871
872 /* Costs of the stores. */
873 vect_get_store_cost (first_dr, ncopies, &inside_cost);
874
875 if (vect_print_dump_info (REPORT_COST))
876 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
877 "outside_cost = %d .", inside_cost, outside_cost);
878
879 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
880 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
881 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
882 }
883
884
885 /* Calculate cost of DR's memory access. */
886 void
887 vect_get_store_cost (struct data_reference *dr, int ncopies,
888 unsigned int *inside_cost)
889 {
890 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
891
892 switch (alignment_support_scheme)
893 {
894 case dr_aligned:
895 {
896 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
897
898 if (vect_print_dump_info (REPORT_COST))
899 fprintf (vect_dump, "vect_model_store_cost: aligned.");
900
901 break;
902 }
903
904 case dr_unaligned_supported:
905 {
906 gimple stmt = DR_STMT (dr);
907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
908 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
909
910 /* Here, we assign an additional cost for the unaligned store. */
911 *inside_cost += ncopies
912 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
913 vectype, DR_MISALIGNMENT (dr));
914
915 if (vect_print_dump_info (REPORT_COST))
916 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
917 "hardware.");
918
919 break;
920 }
921
922 default:
923 gcc_unreachable ();
924 }
925 }
926
927
928 /* Function vect_model_load_cost
929
930 Models cost for loads. In the case of strided accesses, the last access
931 has the overhead of the strided access attributed to it. Since unaligned
932 accesses are supported for loads, we also account for the costs of the
933 access scheme chosen. */
934
935 void
936 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
937 slp_tree slp_node)
938 {
939 int group_size;
940 gimple first_stmt;
941 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
942 unsigned int inside_cost = 0, outside_cost = 0;
943
944 /* The SLP costs were already calculated during SLP tree build. */
945 if (PURE_SLP_STMT (stmt_info))
946 return;
947
948 /* Strided accesses? */
949 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
950 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
951 {
952 group_size = vect_cost_strided_group_size (stmt_info);
953 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
954 }
955 /* Not a strided access. */
956 else
957 {
958 group_size = 1;
959 first_dr = dr;
960 }
961
962 /* We assume that the cost of a single load-lanes instruction is
963 equivalent to the cost of GROUP_SIZE separate loads. If a strided
964 access is instead being provided by a load-and-permute operation,
965 include the cost of the permutes. */
966 if (!load_lanes_p && group_size > 1)
967 {
968 /* Uses an even and odd extract operations for each needed permute. */
969 inside_cost = ncopies * exact_log2(group_size) * group_size
970 * vect_get_stmt_cost (vector_stmt);
971
972 if (vect_print_dump_info (REPORT_COST))
973 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
974 group_size);
975 }
976
977 /* The loads themselves. */
978 vect_get_load_cost (first_dr, ncopies,
979 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
980 || slp_node),
981 &inside_cost, &outside_cost);
982
983 if (vect_print_dump_info (REPORT_COST))
984 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
985 "outside_cost = %d .", inside_cost, outside_cost);
986
987 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
988 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
989 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
990 }
991
992
993 /* Calculate cost of DR's memory access. */
994 void
995 vect_get_load_cost (struct data_reference *dr, int ncopies,
996 bool add_realign_cost, unsigned int *inside_cost,
997 unsigned int *outside_cost)
998 {
999 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1000
1001 switch (alignment_support_scheme)
1002 {
1003 case dr_aligned:
1004 {
1005 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1006
1007 if (vect_print_dump_info (REPORT_COST))
1008 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1009
1010 break;
1011 }
1012 case dr_unaligned_supported:
1013 {
1014 gimple stmt = DR_STMT (dr);
1015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1016 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1017
1018 /* Here, we assign an additional cost for the unaligned load. */
1019 *inside_cost += ncopies
1020 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1021 vectype, DR_MISALIGNMENT (dr));
1022 if (vect_print_dump_info (REPORT_COST))
1023 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1024 "hardware.");
1025
1026 break;
1027 }
1028 case dr_explicit_realign:
1029 {
1030 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1031 + vect_get_stmt_cost (vector_stmt));
1032
1033 /* FIXME: If the misalignment remains fixed across the iterations of
1034 the containing loop, the following cost should be added to the
1035 outside costs. */
1036 if (targetm.vectorize.builtin_mask_for_load)
1037 *inside_cost += vect_get_stmt_cost (vector_stmt);
1038
1039 break;
1040 }
1041 case dr_explicit_realign_optimized:
1042 {
1043 if (vect_print_dump_info (REPORT_COST))
1044 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1045 "pipelined.");
1046
1047 /* Unaligned software pipeline has a load of an address, an initial
1048 load, and possibly a mask operation to "prime" the loop. However,
1049 if this is an access in a group of loads, which provide strided
1050 access, then the above cost should only be considered for one
1051 access in the group. Inside the loop, there is a load op
1052 and a realignment op. */
1053
1054 if (add_realign_cost)
1055 {
1056 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1057 if (targetm.vectorize.builtin_mask_for_load)
1058 *outside_cost += vect_get_stmt_cost (vector_stmt);
1059 }
1060
1061 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1062 + vect_get_stmt_cost (vector_stmt));
1063 break;
1064 }
1065
1066 default:
1067 gcc_unreachable ();
1068 }
1069 }
1070
1071
1072 /* Function vect_init_vector.
1073
1074 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1075 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1076 is not NULL. Otherwise, place the initialization at the loop preheader.
1077 Return the DEF of INIT_STMT.
1078 It will be used in the vectorization of STMT. */
1079
1080 tree
1081 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1082 gimple_stmt_iterator *gsi)
1083 {
1084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1085 tree new_var;
1086 gimple init_stmt;
1087 tree vec_oprnd;
1088 edge pe;
1089 tree new_temp;
1090 basic_block new_bb;
1091
1092 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1093 add_referenced_var (new_var);
1094 init_stmt = gimple_build_assign (new_var, vector_var);
1095 new_temp = make_ssa_name (new_var, init_stmt);
1096 gimple_assign_set_lhs (init_stmt, new_temp);
1097
1098 if (gsi)
1099 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1100 else
1101 {
1102 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1103
1104 if (loop_vinfo)
1105 {
1106 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1107
1108 if (nested_in_vect_loop_p (loop, stmt))
1109 loop = loop->inner;
1110
1111 pe = loop_preheader_edge (loop);
1112 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1113 gcc_assert (!new_bb);
1114 }
1115 else
1116 {
1117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1118 basic_block bb;
1119 gimple_stmt_iterator gsi_bb_start;
1120
1121 gcc_assert (bb_vinfo);
1122 bb = BB_VINFO_BB (bb_vinfo);
1123 gsi_bb_start = gsi_after_labels (bb);
1124 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1125 }
1126 }
1127
1128 if (vect_print_dump_info (REPORT_DETAILS))
1129 {
1130 fprintf (vect_dump, "created new init_stmt: ");
1131 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1132 }
1133
1134 vec_oprnd = gimple_assign_lhs (init_stmt);
1135 return vec_oprnd;
1136 }
1137
1138
1139 /* Function vect_get_vec_def_for_operand.
1140
1141 OP is an operand in STMT. This function returns a (vector) def that will be
1142 used in the vectorized stmt for STMT.
1143
1144 In the case that OP is an SSA_NAME which is defined in the loop, then
1145 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1146
1147 In case OP is an invariant or constant, a new stmt that creates a vector def
1148 needs to be introduced. */
1149
1150 tree
1151 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1152 {
1153 tree vec_oprnd;
1154 gimple vec_stmt;
1155 gimple def_stmt;
1156 stmt_vec_info def_stmt_info = NULL;
1157 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1158 unsigned int nunits;
1159 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1160 tree vec_inv;
1161 tree vec_cst;
1162 tree t = NULL_TREE;
1163 tree def;
1164 int i;
1165 enum vect_def_type dt;
1166 bool is_simple_use;
1167 tree vector_type;
1168
1169 if (vect_print_dump_info (REPORT_DETAILS))
1170 {
1171 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1172 print_generic_expr (vect_dump, op, TDF_SLIM);
1173 }
1174
1175 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1176 &dt);
1177 gcc_assert (is_simple_use);
1178 if (vect_print_dump_info (REPORT_DETAILS))
1179 {
1180 if (def)
1181 {
1182 fprintf (vect_dump, "def = ");
1183 print_generic_expr (vect_dump, def, TDF_SLIM);
1184 }
1185 if (def_stmt)
1186 {
1187 fprintf (vect_dump, " def_stmt = ");
1188 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1189 }
1190 }
1191
1192 switch (dt)
1193 {
1194 /* Case 1: operand is a constant. */
1195 case vect_constant_def:
1196 {
1197 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1198 gcc_assert (vector_type);
1199 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1200
1201 if (scalar_def)
1202 *scalar_def = op;
1203
1204 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1205 if (vect_print_dump_info (REPORT_DETAILS))
1206 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1207
1208 vec_cst = build_vector_from_val (vector_type,
1209 fold_convert (TREE_TYPE (vector_type),
1210 op));
1211 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1212 }
1213
1214 /* Case 2: operand is defined outside the loop - loop invariant. */
1215 case vect_external_def:
1216 {
1217 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1218 gcc_assert (vector_type);
1219 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1220
1221 if (scalar_def)
1222 *scalar_def = def;
1223
1224 /* Create 'vec_inv = {inv,inv,..,inv}' */
1225 if (vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "Create vector_inv.");
1227
1228 for (i = nunits - 1; i >= 0; --i)
1229 {
1230 t = tree_cons (NULL_TREE, def, t);
1231 }
1232
1233 /* FIXME: use build_constructor directly. */
1234 vec_inv = build_constructor_from_list (vector_type, t);
1235 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1236 }
1237
1238 /* Case 3: operand is defined inside the loop. */
1239 case vect_internal_def:
1240 {
1241 if (scalar_def)
1242 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1243
1244 /* Get the def from the vectorized stmt. */
1245 def_stmt_info = vinfo_for_stmt (def_stmt);
1246
1247 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1248 /* Get vectorized pattern statement. */
1249 if (!vec_stmt
1250 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1251 && !STMT_VINFO_RELEVANT (def_stmt_info))
1252 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1253 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1254 gcc_assert (vec_stmt);
1255 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1256 vec_oprnd = PHI_RESULT (vec_stmt);
1257 else if (is_gimple_call (vec_stmt))
1258 vec_oprnd = gimple_call_lhs (vec_stmt);
1259 else
1260 vec_oprnd = gimple_assign_lhs (vec_stmt);
1261 return vec_oprnd;
1262 }
1263
1264 /* Case 4: operand is defined by a loop header phi - reduction */
1265 case vect_reduction_def:
1266 case vect_double_reduction_def:
1267 case vect_nested_cycle:
1268 {
1269 struct loop *loop;
1270
1271 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1272 loop = (gimple_bb (def_stmt))->loop_father;
1273
1274 /* Get the def before the loop */
1275 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1276 return get_initial_def_for_reduction (stmt, op, scalar_def);
1277 }
1278
1279 /* Case 5: operand is defined by loop-header phi - induction. */
1280 case vect_induction_def:
1281 {
1282 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1283
1284 /* Get the def from the vectorized stmt. */
1285 def_stmt_info = vinfo_for_stmt (def_stmt);
1286 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1287 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1288 vec_oprnd = PHI_RESULT (vec_stmt);
1289 else
1290 vec_oprnd = gimple_get_lhs (vec_stmt);
1291 return vec_oprnd;
1292 }
1293
1294 default:
1295 gcc_unreachable ();
1296 }
1297 }
1298
1299
1300 /* Function vect_get_vec_def_for_stmt_copy
1301
1302 Return a vector-def for an operand. This function is used when the
1303 vectorized stmt to be created (by the caller to this function) is a "copy"
1304 created in case the vectorized result cannot fit in one vector, and several
1305 copies of the vector-stmt are required. In this case the vector-def is
1306 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1307 of the stmt that defines VEC_OPRND.
1308 DT is the type of the vector def VEC_OPRND.
1309
1310 Context:
1311 In case the vectorization factor (VF) is bigger than the number
1312 of elements that can fit in a vectype (nunits), we have to generate
1313 more than one vector stmt to vectorize the scalar stmt. This situation
1314 arises when there are multiple data-types operated upon in the loop; the
1315 smallest data-type determines the VF, and as a result, when vectorizing
1316 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1317 vector stmt (each computing a vector of 'nunits' results, and together
1318 computing 'VF' results in each iteration). This function is called when
1319 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1320 which VF=16 and nunits=4, so the number of copies required is 4):
1321
1322 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1323
1324 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1325 VS1.1: vx.1 = memref1 VS1.2
1326 VS1.2: vx.2 = memref2 VS1.3
1327 VS1.3: vx.3 = memref3
1328
1329 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1330 VSnew.1: vz1 = vx.1 + ... VSnew.2
1331 VSnew.2: vz2 = vx.2 + ... VSnew.3
1332 VSnew.3: vz3 = vx.3 + ...
1333
1334 The vectorization of S1 is explained in vectorizable_load.
1335 The vectorization of S2:
1336 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1337 the function 'vect_get_vec_def_for_operand' is called to
1338 get the relevant vector-def for each operand of S2. For operand x it
1339 returns the vector-def 'vx.0'.
1340
1341 To create the remaining copies of the vector-stmt (VSnew.j), this
1342 function is called to get the relevant vector-def for each operand. It is
1343 obtained from the respective VS1.j stmt, which is recorded in the
1344 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1345
1346 For example, to obtain the vector-def 'vx.1' in order to create the
1347 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1348 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1349 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1350 and return its def ('vx.1').
1351 Overall, to create the above sequence this function will be called 3 times:
1352 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1353 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1354 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1355
1356 tree
1357 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1358 {
1359 gimple vec_stmt_for_operand;
1360 stmt_vec_info def_stmt_info;
1361
1362 /* Do nothing; can reuse same def. */
1363 if (dt == vect_external_def || dt == vect_constant_def )
1364 return vec_oprnd;
1365
1366 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1367 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1368 gcc_assert (def_stmt_info);
1369 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1370 gcc_assert (vec_stmt_for_operand);
1371 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1372 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1373 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1374 else
1375 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1376 return vec_oprnd;
1377 }
1378
1379
1380 /* Get vectorized definitions for the operands to create a copy of an original
1381 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1382
1383 static void
1384 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1385 VEC(tree,heap) **vec_oprnds0,
1386 VEC(tree,heap) **vec_oprnds1)
1387 {
1388 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1389
1390 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1391 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1392
1393 if (vec_oprnds1 && *vec_oprnds1)
1394 {
1395 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1396 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1397 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1398 }
1399 }
1400
1401
1402 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1403 NULL. */
1404
1405 static void
1406 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1407 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1408 slp_tree slp_node)
1409 {
1410 if (slp_node)
1411 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
1412 else
1413 {
1414 tree vec_oprnd;
1415
1416 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1417 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1418 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1419
1420 if (op1)
1421 {
1422 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1423 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1424 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1425 }
1426 }
1427 }
1428
1429
1430 /* Function vect_finish_stmt_generation.
1431
1432 Insert a new stmt. */
1433
1434 void
1435 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1436 gimple_stmt_iterator *gsi)
1437 {
1438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1439 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1440 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1441
1442 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1443
1444 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1445
1446 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1447 bb_vinfo));
1448
1449 if (vect_print_dump_info (REPORT_DETAILS))
1450 {
1451 fprintf (vect_dump, "add new stmt: ");
1452 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1453 }
1454
1455 gimple_set_location (vec_stmt, gimple_location (stmt));
1456 }
1457
1458 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1459 a function declaration if the target has a vectorized version
1460 of the function, or NULL_TREE if the function cannot be vectorized. */
1461
1462 tree
1463 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1464 {
1465 tree fndecl = gimple_call_fndecl (call);
1466
1467 /* We only handle functions that do not read or clobber memory -- i.e.
1468 const or novops ones. */
1469 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1470 return NULL_TREE;
1471
1472 if (!fndecl
1473 || TREE_CODE (fndecl) != FUNCTION_DECL
1474 || !DECL_BUILT_IN (fndecl))
1475 return NULL_TREE;
1476
1477 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1478 vectype_in);
1479 }
1480
1481 /* Function vectorizable_call.
1482
1483 Check if STMT performs a function call that can be vectorized.
1484 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1485 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1486 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1487
1488 static bool
1489 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1490 {
1491 tree vec_dest;
1492 tree scalar_dest;
1493 tree op, type;
1494 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1496 tree vectype_out, vectype_in;
1497 int nunits_in;
1498 int nunits_out;
1499 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1500 tree fndecl, new_temp, def, rhs_type;
1501 gimple def_stmt;
1502 enum vect_def_type dt[3]
1503 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1504 gimple new_stmt = NULL;
1505 int ncopies, j;
1506 VEC(tree, heap) *vargs = NULL;
1507 enum { NARROW, NONE, WIDEN } modifier;
1508 size_t i, nargs;
1509 tree lhs;
1510
1511 /* FORNOW: unsupported in basic block SLP. */
1512 gcc_assert (loop_vinfo);
1513
1514 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1515 return false;
1516
1517 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1518 return false;
1519
1520 /* FORNOW: SLP not supported. */
1521 if (STMT_SLP_TYPE (stmt_info))
1522 return false;
1523
1524 /* Is STMT a vectorizable call? */
1525 if (!is_gimple_call (stmt))
1526 return false;
1527
1528 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1529 return false;
1530
1531 if (stmt_can_throw_internal (stmt))
1532 return false;
1533
1534 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1535
1536 /* Process function arguments. */
1537 rhs_type = NULL_TREE;
1538 vectype_in = NULL_TREE;
1539 nargs = gimple_call_num_args (stmt);
1540
1541 /* Bail out if the function has more than three arguments, we do not have
1542 interesting builtin functions to vectorize with more than two arguments
1543 except for fma. No arguments is also not good. */
1544 if (nargs == 0 || nargs > 3)
1545 return false;
1546
1547 for (i = 0; i < nargs; i++)
1548 {
1549 tree opvectype;
1550
1551 op = gimple_call_arg (stmt, i);
1552
1553 /* We can only handle calls with arguments of the same type. */
1554 if (rhs_type
1555 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1556 {
1557 if (vect_print_dump_info (REPORT_DETAILS))
1558 fprintf (vect_dump, "argument types differ.");
1559 return false;
1560 }
1561 if (!rhs_type)
1562 rhs_type = TREE_TYPE (op);
1563
1564 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1565 &def_stmt, &def, &dt[i], &opvectype))
1566 {
1567 if (vect_print_dump_info (REPORT_DETAILS))
1568 fprintf (vect_dump, "use not simple.");
1569 return false;
1570 }
1571
1572 if (!vectype_in)
1573 vectype_in = opvectype;
1574 else if (opvectype
1575 && opvectype != vectype_in)
1576 {
1577 if (vect_print_dump_info (REPORT_DETAILS))
1578 fprintf (vect_dump, "argument vector types differ.");
1579 return false;
1580 }
1581 }
1582 /* If all arguments are external or constant defs use a vector type with
1583 the same size as the output vector type. */
1584 if (!vectype_in)
1585 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1586 if (vec_stmt)
1587 gcc_assert (vectype_in);
1588 if (!vectype_in)
1589 {
1590 if (vect_print_dump_info (REPORT_DETAILS))
1591 {
1592 fprintf (vect_dump, "no vectype for scalar type ");
1593 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1594 }
1595
1596 return false;
1597 }
1598
1599 /* FORNOW */
1600 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1601 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1602 if (nunits_in == nunits_out / 2)
1603 modifier = NARROW;
1604 else if (nunits_out == nunits_in)
1605 modifier = NONE;
1606 else if (nunits_out == nunits_in / 2)
1607 modifier = WIDEN;
1608 else
1609 return false;
1610
1611 /* For now, we only vectorize functions if a target specific builtin
1612 is available. TODO -- in some cases, it might be profitable to
1613 insert the calls for pieces of the vector, in order to be able
1614 to vectorize other operations in the loop. */
1615 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1616 if (fndecl == NULL_TREE)
1617 {
1618 if (vect_print_dump_info (REPORT_DETAILS))
1619 fprintf (vect_dump, "function is not vectorizable.");
1620
1621 return false;
1622 }
1623
1624 gcc_assert (!gimple_vuse (stmt));
1625
1626 if (modifier == NARROW)
1627 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1628 else
1629 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1630
1631 /* Sanity check: make sure that at least one copy of the vectorized stmt
1632 needs to be generated. */
1633 gcc_assert (ncopies >= 1);
1634
1635 if (!vec_stmt) /* transformation not required. */
1636 {
1637 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1638 if (vect_print_dump_info (REPORT_DETAILS))
1639 fprintf (vect_dump, "=== vectorizable_call ===");
1640 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1641 return true;
1642 }
1643
1644 /** Transform. **/
1645
1646 if (vect_print_dump_info (REPORT_DETAILS))
1647 fprintf (vect_dump, "transform call.");
1648
1649 /* Handle def. */
1650 scalar_dest = gimple_call_lhs (stmt);
1651 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1652
1653 prev_stmt_info = NULL;
1654 switch (modifier)
1655 {
1656 case NONE:
1657 for (j = 0; j < ncopies; ++j)
1658 {
1659 /* Build argument list for the vectorized call. */
1660 if (j == 0)
1661 vargs = VEC_alloc (tree, heap, nargs);
1662 else
1663 VEC_truncate (tree, vargs, 0);
1664
1665 for (i = 0; i < nargs; i++)
1666 {
1667 op = gimple_call_arg (stmt, i);
1668 if (j == 0)
1669 vec_oprnd0
1670 = vect_get_vec_def_for_operand (op, stmt, NULL);
1671 else
1672 {
1673 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1674 vec_oprnd0
1675 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1676 }
1677
1678 VEC_quick_push (tree, vargs, vec_oprnd0);
1679 }
1680
1681 new_stmt = gimple_build_call_vec (fndecl, vargs);
1682 new_temp = make_ssa_name (vec_dest, new_stmt);
1683 gimple_call_set_lhs (new_stmt, new_temp);
1684
1685 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1686 mark_symbols_for_renaming (new_stmt);
1687
1688 if (j == 0)
1689 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1690 else
1691 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1692
1693 prev_stmt_info = vinfo_for_stmt (new_stmt);
1694 }
1695
1696 break;
1697
1698 case NARROW:
1699 for (j = 0; j < ncopies; ++j)
1700 {
1701 /* Build argument list for the vectorized call. */
1702 if (j == 0)
1703 vargs = VEC_alloc (tree, heap, nargs * 2);
1704 else
1705 VEC_truncate (tree, vargs, 0);
1706
1707 for (i = 0; i < nargs; i++)
1708 {
1709 op = gimple_call_arg (stmt, i);
1710 if (j == 0)
1711 {
1712 vec_oprnd0
1713 = vect_get_vec_def_for_operand (op, stmt, NULL);
1714 vec_oprnd1
1715 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1716 }
1717 else
1718 {
1719 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1720 vec_oprnd0
1721 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1722 vec_oprnd1
1723 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1724 }
1725
1726 VEC_quick_push (tree, vargs, vec_oprnd0);
1727 VEC_quick_push (tree, vargs, vec_oprnd1);
1728 }
1729
1730 new_stmt = gimple_build_call_vec (fndecl, vargs);
1731 new_temp = make_ssa_name (vec_dest, new_stmt);
1732 gimple_call_set_lhs (new_stmt, new_temp);
1733
1734 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1735 mark_symbols_for_renaming (new_stmt);
1736
1737 if (j == 0)
1738 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1739 else
1740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1741
1742 prev_stmt_info = vinfo_for_stmt (new_stmt);
1743 }
1744
1745 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1746
1747 break;
1748
1749 case WIDEN:
1750 /* No current target implements this case. */
1751 return false;
1752 }
1753
1754 VEC_free (tree, heap, vargs);
1755
1756 /* Update the exception handling table with the vector stmt if necessary. */
1757 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1758 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1759
1760 /* The call in STMT might prevent it from being removed in dce.
1761 We however cannot remove it here, due to the way the ssa name
1762 it defines is mapped to the new definition. So just replace
1763 rhs of the statement with something harmless. */
1764
1765 type = TREE_TYPE (scalar_dest);
1766 if (is_pattern_stmt_p (stmt_info))
1767 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1768 else
1769 lhs = gimple_call_lhs (stmt);
1770 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1771 set_vinfo_for_stmt (new_stmt, stmt_info);
1772 set_vinfo_for_stmt (stmt, NULL);
1773 STMT_VINFO_STMT (stmt_info) = new_stmt;
1774 gsi_replace (gsi, new_stmt, false);
1775 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1776
1777 return true;
1778 }
1779
1780
1781 /* Function vect_gen_widened_results_half
1782
1783 Create a vector stmt whose code, type, number of arguments, and result
1784 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1785 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1786 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1787 needs to be created (DECL is a function-decl of a target-builtin).
1788 STMT is the original scalar stmt that we are vectorizing. */
1789
1790 static gimple
1791 vect_gen_widened_results_half (enum tree_code code,
1792 tree decl,
1793 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1794 tree vec_dest, gimple_stmt_iterator *gsi,
1795 gimple stmt)
1796 {
1797 gimple new_stmt;
1798 tree new_temp;
1799
1800 /* Generate half of the widened result: */
1801 if (code == CALL_EXPR)
1802 {
1803 /* Target specific support */
1804 if (op_type == binary_op)
1805 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1806 else
1807 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1808 new_temp = make_ssa_name (vec_dest, new_stmt);
1809 gimple_call_set_lhs (new_stmt, new_temp);
1810 }
1811 else
1812 {
1813 /* Generic support */
1814 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1815 if (op_type != binary_op)
1816 vec_oprnd1 = NULL;
1817 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1818 vec_oprnd1);
1819 new_temp = make_ssa_name (vec_dest, new_stmt);
1820 gimple_assign_set_lhs (new_stmt, new_temp);
1821 }
1822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1823
1824 return new_stmt;
1825 }
1826
1827
1828 /* Check if STMT performs a conversion operation, that can be vectorized.
1829 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1830 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1831 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1832
1833 static bool
1834 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1835 gimple *vec_stmt, slp_tree slp_node)
1836 {
1837 tree vec_dest;
1838 tree scalar_dest;
1839 tree op0;
1840 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1841 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1843 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1844 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1845 tree new_temp;
1846 tree def;
1847 gimple def_stmt;
1848 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1849 gimple new_stmt = NULL;
1850 stmt_vec_info prev_stmt_info;
1851 int nunits_in;
1852 int nunits_out;
1853 tree vectype_out, vectype_in;
1854 int ncopies, j;
1855 tree rhs_type;
1856 tree builtin_decl;
1857 enum { NARROW, NONE, WIDEN } modifier;
1858 int i;
1859 VEC(tree,heap) *vec_oprnds0 = NULL;
1860 tree vop0;
1861 VEC(tree,heap) *dummy = NULL;
1862 int dummy_int;
1863
1864 /* Is STMT a vectorizable conversion? */
1865
1866 /* FORNOW: unsupported in basic block SLP. */
1867 gcc_assert (loop_vinfo);
1868
1869 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1870 return false;
1871
1872 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1873 return false;
1874
1875 if (!is_gimple_assign (stmt))
1876 return false;
1877
1878 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1879 return false;
1880
1881 code = gimple_assign_rhs_code (stmt);
1882 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1883 return false;
1884
1885 /* Check types of lhs and rhs. */
1886 scalar_dest = gimple_assign_lhs (stmt);
1887 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1888
1889 op0 = gimple_assign_rhs1 (stmt);
1890 rhs_type = TREE_TYPE (op0);
1891 /* Check the operands of the operation. */
1892 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1893 &def_stmt, &def, &dt[0], &vectype_in))
1894 {
1895 if (vect_print_dump_info (REPORT_DETAILS))
1896 fprintf (vect_dump, "use not simple.");
1897 return false;
1898 }
1899 /* If op0 is an external or constant defs use a vector type of
1900 the same size as the output vector type. */
1901 if (!vectype_in)
1902 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1903 if (vec_stmt)
1904 gcc_assert (vectype_in);
1905 if (!vectype_in)
1906 {
1907 if (vect_print_dump_info (REPORT_DETAILS))
1908 {
1909 fprintf (vect_dump, "no vectype for scalar type ");
1910 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1911 }
1912
1913 return false;
1914 }
1915
1916 /* FORNOW */
1917 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1918 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1919 if (nunits_in == nunits_out / 2)
1920 modifier = NARROW;
1921 else if (nunits_out == nunits_in)
1922 modifier = NONE;
1923 else if (nunits_out == nunits_in / 2)
1924 modifier = WIDEN;
1925 else
1926 return false;
1927
1928 if (modifier == NARROW)
1929 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1930 else
1931 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1932
1933 /* Multiple types in SLP are handled by creating the appropriate number of
1934 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1935 case of SLP. */
1936 if (slp_node || PURE_SLP_STMT (stmt_info))
1937 ncopies = 1;
1938
1939 /* Sanity check: make sure that at least one copy of the vectorized stmt
1940 needs to be generated. */
1941 gcc_assert (ncopies >= 1);
1942
1943 /* Supportable by target? */
1944 if ((modifier == NONE
1945 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
1946 || (modifier == WIDEN
1947 && !supportable_widening_operation (code, stmt,
1948 vectype_out, vectype_in,
1949 &decl1, &decl2,
1950 &code1, &code2,
1951 &dummy_int, &dummy))
1952 || (modifier == NARROW
1953 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1954 &code1, &dummy_int, &dummy)))
1955 {
1956 if (vect_print_dump_info (REPORT_DETAILS))
1957 fprintf (vect_dump, "conversion not supported by target.");
1958 return false;
1959 }
1960
1961 if (modifier != NONE)
1962 {
1963 /* FORNOW: SLP not supported. */
1964 if (STMT_SLP_TYPE (stmt_info))
1965 return false;
1966 }
1967
1968 if (!vec_stmt) /* transformation not required. */
1969 {
1970 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1971 return true;
1972 }
1973
1974 /** Transform. **/
1975 if (vect_print_dump_info (REPORT_DETAILS))
1976 fprintf (vect_dump, "transform conversion.");
1977
1978 /* Handle def. */
1979 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1980
1981 if (modifier == NONE && !slp_node)
1982 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1983
1984 prev_stmt_info = NULL;
1985 switch (modifier)
1986 {
1987 case NONE:
1988 for (j = 0; j < ncopies; j++)
1989 {
1990 if (j == 0)
1991 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1992 else
1993 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1994
1995 builtin_decl =
1996 targetm.vectorize.builtin_conversion (code,
1997 vectype_out, vectype_in);
1998 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
1999 {
2000 /* Arguments are ready. create the new vector stmt. */
2001 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
2002 new_temp = make_ssa_name (vec_dest, new_stmt);
2003 gimple_call_set_lhs (new_stmt, new_temp);
2004 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2005 if (slp_node)
2006 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2007 }
2008
2009 if (j == 0)
2010 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2011 else
2012 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2013 prev_stmt_info = vinfo_for_stmt (new_stmt);
2014 }
2015 break;
2016
2017 case WIDEN:
2018 /* In case the vectorization factor (VF) is bigger than the number
2019 of elements that we can fit in a vectype (nunits), we have to
2020 generate more than one vector stmt - i.e - we need to "unroll"
2021 the vector stmt by a factor VF/nunits. */
2022 for (j = 0; j < ncopies; j++)
2023 {
2024 if (j == 0)
2025 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2026 else
2027 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2028
2029 /* Generate first half of the widened result: */
2030 new_stmt
2031 = vect_gen_widened_results_half (code1, decl1,
2032 vec_oprnd0, vec_oprnd1,
2033 unary_op, vec_dest, gsi, stmt);
2034 if (j == 0)
2035 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2036 else
2037 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2038 prev_stmt_info = vinfo_for_stmt (new_stmt);
2039
2040 /* Generate second half of the widened result: */
2041 new_stmt
2042 = vect_gen_widened_results_half (code2, decl2,
2043 vec_oprnd0, vec_oprnd1,
2044 unary_op, vec_dest, gsi, stmt);
2045 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2046 prev_stmt_info = vinfo_for_stmt (new_stmt);
2047 }
2048 break;
2049
2050 case NARROW:
2051 /* In case the vectorization factor (VF) is bigger than the number
2052 of elements that we can fit in a vectype (nunits), we have to
2053 generate more than one vector stmt - i.e - we need to "unroll"
2054 the vector stmt by a factor VF/nunits. */
2055 for (j = 0; j < ncopies; j++)
2056 {
2057 /* Handle uses. */
2058 if (j == 0)
2059 {
2060 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2061 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2062 }
2063 else
2064 {
2065 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2066 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2067 }
2068
2069 /* Arguments are ready. Create the new vector stmt. */
2070 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2071 vec_oprnd1);
2072 new_temp = make_ssa_name (vec_dest, new_stmt);
2073 gimple_assign_set_lhs (new_stmt, new_temp);
2074 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2075
2076 if (j == 0)
2077 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2078 else
2079 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2080
2081 prev_stmt_info = vinfo_for_stmt (new_stmt);
2082 }
2083
2084 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2085 }
2086
2087 if (vec_oprnds0)
2088 VEC_free (tree, heap, vec_oprnds0);
2089
2090 return true;
2091 }
2092
2093
2094 /* Function vectorizable_assignment.
2095
2096 Check if STMT performs an assignment (copy) that can be vectorized.
2097 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2098 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2099 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2100
2101 static bool
2102 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2103 gimple *vec_stmt, slp_tree slp_node)
2104 {
2105 tree vec_dest;
2106 tree scalar_dest;
2107 tree op;
2108 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2109 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2111 tree new_temp;
2112 tree def;
2113 gimple def_stmt;
2114 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2115 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2116 int ncopies;
2117 int i, j;
2118 VEC(tree,heap) *vec_oprnds = NULL;
2119 tree vop;
2120 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2121 gimple new_stmt = NULL;
2122 stmt_vec_info prev_stmt_info = NULL;
2123 enum tree_code code;
2124 tree vectype_in;
2125
2126 /* Multiple types in SLP are handled by creating the appropriate number of
2127 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2128 case of SLP. */
2129 if (slp_node || PURE_SLP_STMT (stmt_info))
2130 ncopies = 1;
2131 else
2132 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2133
2134 gcc_assert (ncopies >= 1);
2135
2136 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2137 return false;
2138
2139 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2140 return false;
2141
2142 /* Is vectorizable assignment? */
2143 if (!is_gimple_assign (stmt))
2144 return false;
2145
2146 scalar_dest = gimple_assign_lhs (stmt);
2147 if (TREE_CODE (scalar_dest) != SSA_NAME)
2148 return false;
2149
2150 code = gimple_assign_rhs_code (stmt);
2151 if (gimple_assign_single_p (stmt)
2152 || code == PAREN_EXPR
2153 || CONVERT_EXPR_CODE_P (code))
2154 op = gimple_assign_rhs1 (stmt);
2155 else
2156 return false;
2157
2158 if (code == VIEW_CONVERT_EXPR)
2159 op = TREE_OPERAND (op, 0);
2160
2161 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2162 &def_stmt, &def, &dt[0], &vectype_in))
2163 {
2164 if (vect_print_dump_info (REPORT_DETAILS))
2165 fprintf (vect_dump, "use not simple.");
2166 return false;
2167 }
2168
2169 /* We can handle NOP_EXPR conversions that do not change the number
2170 of elements or the vector size. */
2171 if ((CONVERT_EXPR_CODE_P (code)
2172 || code == VIEW_CONVERT_EXPR)
2173 && (!vectype_in
2174 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2175 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2176 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2177 return false;
2178
2179 /* We do not handle bit-precision changes. */
2180 if ((CONVERT_EXPR_CODE_P (code)
2181 || code == VIEW_CONVERT_EXPR)
2182 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2183 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2184 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2185 || ((TYPE_PRECISION (TREE_TYPE (op))
2186 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2187 /* But a conversion that does not change the bit-pattern is ok. */
2188 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2189 > TYPE_PRECISION (TREE_TYPE (op)))
2190 && TYPE_UNSIGNED (TREE_TYPE (op))))
2191 {
2192 if (vect_print_dump_info (REPORT_DETAILS))
2193 fprintf (vect_dump, "type conversion to/from bit-precision "
2194 "unsupported.");
2195 return false;
2196 }
2197
2198 if (!vec_stmt) /* transformation not required. */
2199 {
2200 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2201 if (vect_print_dump_info (REPORT_DETAILS))
2202 fprintf (vect_dump, "=== vectorizable_assignment ===");
2203 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2204 return true;
2205 }
2206
2207 /** Transform. **/
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "transform assignment.");
2210
2211 /* Handle def. */
2212 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2213
2214 /* Handle use. */
2215 for (j = 0; j < ncopies; j++)
2216 {
2217 /* Handle uses. */
2218 if (j == 0)
2219 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2220 else
2221 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2222
2223 /* Arguments are ready. create the new vector stmt. */
2224 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2225 {
2226 if (CONVERT_EXPR_CODE_P (code)
2227 || code == VIEW_CONVERT_EXPR)
2228 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2229 new_stmt = gimple_build_assign (vec_dest, vop);
2230 new_temp = make_ssa_name (vec_dest, new_stmt);
2231 gimple_assign_set_lhs (new_stmt, new_temp);
2232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2233 if (slp_node)
2234 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2235 }
2236
2237 if (slp_node)
2238 continue;
2239
2240 if (j == 0)
2241 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2242 else
2243 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2244
2245 prev_stmt_info = vinfo_for_stmt (new_stmt);
2246 }
2247
2248 VEC_free (tree, heap, vec_oprnds);
2249 return true;
2250 }
2251
2252
2253 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2254 either as shift by a scalar or by a vector. */
2255
2256 bool
2257 vect_supportable_shift (enum tree_code code, tree scalar_type)
2258 {
2259
2260 enum machine_mode vec_mode;
2261 optab optab;
2262 int icode;
2263 tree vectype;
2264
2265 vectype = get_vectype_for_scalar_type (scalar_type);
2266 if (!vectype)
2267 return false;
2268
2269 optab = optab_for_tree_code (code, vectype, optab_scalar);
2270 if (!optab
2271 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2272 {
2273 optab = optab_for_tree_code (code, vectype, optab_vector);
2274 if (!optab
2275 || (optab_handler (optab, TYPE_MODE (vectype))
2276 == CODE_FOR_nothing))
2277 return false;
2278 }
2279
2280 vec_mode = TYPE_MODE (vectype);
2281 icode = (int) optab_handler (optab, vec_mode);
2282 if (icode == CODE_FOR_nothing)
2283 return false;
2284
2285 return true;
2286 }
2287
2288
2289 /* Function vectorizable_shift.
2290
2291 Check if STMT performs a shift operation that can be vectorized.
2292 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2293 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2294 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2295
2296 static bool
2297 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2298 gimple *vec_stmt, slp_tree slp_node)
2299 {
2300 tree vec_dest;
2301 tree scalar_dest;
2302 tree op0, op1 = NULL;
2303 tree vec_oprnd1 = NULL_TREE;
2304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2305 tree vectype;
2306 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2307 enum tree_code code;
2308 enum machine_mode vec_mode;
2309 tree new_temp;
2310 optab optab;
2311 int icode;
2312 enum machine_mode optab_op2_mode;
2313 tree def;
2314 gimple def_stmt;
2315 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2316 gimple new_stmt = NULL;
2317 stmt_vec_info prev_stmt_info;
2318 int nunits_in;
2319 int nunits_out;
2320 tree vectype_out;
2321 tree op1_vectype;
2322 int ncopies;
2323 int j, i;
2324 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2325 tree vop0, vop1;
2326 unsigned int k;
2327 bool scalar_shift_arg = true;
2328 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2329 int vf;
2330
2331 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2332 return false;
2333
2334 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2335 return false;
2336
2337 /* Is STMT a vectorizable binary/unary operation? */
2338 if (!is_gimple_assign (stmt))
2339 return false;
2340
2341 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2342 return false;
2343
2344 code = gimple_assign_rhs_code (stmt);
2345
2346 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2347 || code == RROTATE_EXPR))
2348 return false;
2349
2350 scalar_dest = gimple_assign_lhs (stmt);
2351 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2352 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2353 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2354 {
2355 if (vect_print_dump_info (REPORT_DETAILS))
2356 fprintf (vect_dump, "bit-precision shifts not supported.");
2357 return false;
2358 }
2359
2360 op0 = gimple_assign_rhs1 (stmt);
2361 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2362 &def_stmt, &def, &dt[0], &vectype))
2363 {
2364 if (vect_print_dump_info (REPORT_DETAILS))
2365 fprintf (vect_dump, "use not simple.");
2366 return false;
2367 }
2368 /* If op0 is an external or constant def use a vector type with
2369 the same size as the output vector type. */
2370 if (!vectype)
2371 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2372 if (vec_stmt)
2373 gcc_assert (vectype);
2374 if (!vectype)
2375 {
2376 if (vect_print_dump_info (REPORT_DETAILS))
2377 {
2378 fprintf (vect_dump, "no vectype for scalar type ");
2379 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2380 }
2381
2382 return false;
2383 }
2384
2385 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2386 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2387 if (nunits_out != nunits_in)
2388 return false;
2389
2390 op1 = gimple_assign_rhs2 (stmt);
2391 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2392 &dt[1], &op1_vectype))
2393 {
2394 if (vect_print_dump_info (REPORT_DETAILS))
2395 fprintf (vect_dump, "use not simple.");
2396 return false;
2397 }
2398
2399 if (loop_vinfo)
2400 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2401 else
2402 vf = 1;
2403
2404 /* Multiple types in SLP are handled by creating the appropriate number of
2405 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2406 case of SLP. */
2407 if (slp_node || PURE_SLP_STMT (stmt_info))
2408 ncopies = 1;
2409 else
2410 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2411
2412 gcc_assert (ncopies >= 1);
2413
2414 /* Determine whether the shift amount is a vector, or scalar. If the
2415 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2416
2417 if (dt[1] == vect_internal_def && !slp_node)
2418 scalar_shift_arg = false;
2419 else if (dt[1] == vect_constant_def
2420 || dt[1] == vect_external_def
2421 || dt[1] == vect_internal_def)
2422 {
2423 /* In SLP, need to check whether the shift count is the same,
2424 in loops if it is a constant or invariant, it is always
2425 a scalar shift. */
2426 if (slp_node)
2427 {
2428 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2429 gimple slpstmt;
2430
2431 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2432 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2433 scalar_shift_arg = false;
2434 }
2435 }
2436 else
2437 {
2438 if (vect_print_dump_info (REPORT_DETAILS))
2439 fprintf (vect_dump, "operand mode requires invariant argument.");
2440 return false;
2441 }
2442
2443 /* Vector shifted by vector. */
2444 if (!scalar_shift_arg)
2445 {
2446 optab = optab_for_tree_code (code, vectype, optab_vector);
2447 if (vect_print_dump_info (REPORT_DETAILS))
2448 fprintf (vect_dump, "vector/vector shift/rotate found.");
2449 if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
2450 {
2451 if (vect_print_dump_info (REPORT_DETAILS))
2452 fprintf (vect_dump, "unusable type for last operand in"
2453 " vector/vector shift/rotate.");
2454 return false;
2455 }
2456 }
2457 /* See if the machine has a vector shifted by scalar insn and if not
2458 then see if it has a vector shifted by vector insn. */
2459 else
2460 {
2461 optab = optab_for_tree_code (code, vectype, optab_scalar);
2462 if (optab
2463 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2464 {
2465 if (vect_print_dump_info (REPORT_DETAILS))
2466 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2467 }
2468 else
2469 {
2470 optab = optab_for_tree_code (code, vectype, optab_vector);
2471 if (optab
2472 && (optab_handler (optab, TYPE_MODE (vectype))
2473 != CODE_FOR_nothing))
2474 {
2475 scalar_shift_arg = false;
2476
2477 if (vect_print_dump_info (REPORT_DETAILS))
2478 fprintf (vect_dump, "vector/vector shift/rotate found.");
2479
2480 /* Unlike the other binary operators, shifts/rotates have
2481 the rhs being int, instead of the same type as the lhs,
2482 so make sure the scalar is the right type if we are
2483 dealing with vectors of short/char. */
2484 if (dt[1] == vect_constant_def)
2485 op1 = fold_convert (TREE_TYPE (vectype), op1);
2486 }
2487 }
2488 }
2489
2490 /* Supportable by target? */
2491 if (!optab)
2492 {
2493 if (vect_print_dump_info (REPORT_DETAILS))
2494 fprintf (vect_dump, "no optab.");
2495 return false;
2496 }
2497 vec_mode = TYPE_MODE (vectype);
2498 icode = (int) optab_handler (optab, vec_mode);
2499 if (icode == CODE_FOR_nothing)
2500 {
2501 if (vect_print_dump_info (REPORT_DETAILS))
2502 fprintf (vect_dump, "op not supported by target.");
2503 /* Check only during analysis. */
2504 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2505 || (vf < vect_min_worthwhile_factor (code)
2506 && !vec_stmt))
2507 return false;
2508 if (vect_print_dump_info (REPORT_DETAILS))
2509 fprintf (vect_dump, "proceeding using word mode.");
2510 }
2511
2512 /* Worthwhile without SIMD support? Check only during analysis. */
2513 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2514 && vf < vect_min_worthwhile_factor (code)
2515 && !vec_stmt)
2516 {
2517 if (vect_print_dump_info (REPORT_DETAILS))
2518 fprintf (vect_dump, "not worthwhile without SIMD support.");
2519 return false;
2520 }
2521
2522 if (!vec_stmt) /* transformation not required. */
2523 {
2524 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2525 if (vect_print_dump_info (REPORT_DETAILS))
2526 fprintf (vect_dump, "=== vectorizable_shift ===");
2527 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2528 return true;
2529 }
2530
2531 /** Transform. **/
2532
2533 if (vect_print_dump_info (REPORT_DETAILS))
2534 fprintf (vect_dump, "transform binary/unary operation.");
2535
2536 /* Handle def. */
2537 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2538
2539 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2540 created in the previous stages of the recursion, so no allocation is
2541 needed, except for the case of shift with scalar shift argument. In that
2542 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2543 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2544 In case of loop-based vectorization we allocate VECs of size 1. We
2545 allocate VEC_OPRNDS1 only in case of binary operation. */
2546 if (!slp_node)
2547 {
2548 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2549 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2550 }
2551 else if (scalar_shift_arg)
2552 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2553
2554 prev_stmt_info = NULL;
2555 for (j = 0; j < ncopies; j++)
2556 {
2557 /* Handle uses. */
2558 if (j == 0)
2559 {
2560 if (scalar_shift_arg)
2561 {
2562 /* Vector shl and shr insn patterns can be defined with scalar
2563 operand 2 (shift operand). In this case, use constant or loop
2564 invariant op1 directly, without extending it to vector mode
2565 first. */
2566 optab_op2_mode = insn_data[icode].operand[2].mode;
2567 if (!VECTOR_MODE_P (optab_op2_mode))
2568 {
2569 if (vect_print_dump_info (REPORT_DETAILS))
2570 fprintf (vect_dump, "operand 1 using scalar mode.");
2571 vec_oprnd1 = op1;
2572 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2573 if (slp_node)
2574 {
2575 /* Store vec_oprnd1 for every vector stmt to be created
2576 for SLP_NODE. We check during the analysis that all
2577 the shift arguments are the same.
2578 TODO: Allow different constants for different vector
2579 stmts generated for an SLP instance. */
2580 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2581 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2582 }
2583 }
2584 }
2585
2586 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2587 (a special case for certain kind of vector shifts); otherwise,
2588 operand 1 should be of a vector type (the usual case). */
2589 if (vec_oprnd1)
2590 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2591 slp_node);
2592 else
2593 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2594 slp_node);
2595 }
2596 else
2597 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2598
2599 /* Arguments are ready. Create the new vector stmt. */
2600 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2601 {
2602 vop1 = VEC_index (tree, vec_oprnds1, i);
2603 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2604 new_temp = make_ssa_name (vec_dest, new_stmt);
2605 gimple_assign_set_lhs (new_stmt, new_temp);
2606 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2607 if (slp_node)
2608 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2609 }
2610
2611 if (slp_node)
2612 continue;
2613
2614 if (j == 0)
2615 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2616 else
2617 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2618 prev_stmt_info = vinfo_for_stmt (new_stmt);
2619 }
2620
2621 VEC_free (tree, heap, vec_oprnds0);
2622 VEC_free (tree, heap, vec_oprnds1);
2623
2624 return true;
2625 }
2626
2627
2628 /* Function vectorizable_operation.
2629
2630 Check if STMT performs a binary, unary or ternary operation that can
2631 be vectorized.
2632 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2633 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2634 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2635
2636 static bool
2637 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2638 gimple *vec_stmt, slp_tree slp_node)
2639 {
2640 tree vec_dest;
2641 tree scalar_dest;
2642 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
2643 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2644 tree vectype;
2645 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2646 enum tree_code code;
2647 enum machine_mode vec_mode;
2648 tree new_temp;
2649 int op_type;
2650 optab optab;
2651 int icode;
2652 tree def;
2653 gimple def_stmt;
2654 enum vect_def_type dt[3]
2655 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2656 gimple new_stmt = NULL;
2657 stmt_vec_info prev_stmt_info;
2658 int nunits_in;
2659 int nunits_out;
2660 tree vectype_out;
2661 int ncopies;
2662 int j, i;
2663 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2664 tree vop0, vop1, vop2;
2665 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2666 int vf;
2667
2668 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2669 return false;
2670
2671 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2672 return false;
2673
2674 /* Is STMT a vectorizable binary/unary operation? */
2675 if (!is_gimple_assign (stmt))
2676 return false;
2677
2678 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2679 return false;
2680
2681 code = gimple_assign_rhs_code (stmt);
2682
2683 /* For pointer addition, we should use the normal plus for
2684 the vector addition. */
2685 if (code == POINTER_PLUS_EXPR)
2686 code = PLUS_EXPR;
2687
2688 /* Support only unary or binary operations. */
2689 op_type = TREE_CODE_LENGTH (code);
2690 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
2691 {
2692 if (vect_print_dump_info (REPORT_DETAILS))
2693 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2694 op_type);
2695 return false;
2696 }
2697
2698 scalar_dest = gimple_assign_lhs (stmt);
2699 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2700
2701 /* Most operations cannot handle bit-precision types without extra
2702 truncations. */
2703 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2704 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2705 /* Exception are bitwise binary operations. */
2706 && code != BIT_IOR_EXPR
2707 && code != BIT_XOR_EXPR
2708 && code != BIT_AND_EXPR)
2709 {
2710 if (vect_print_dump_info (REPORT_DETAILS))
2711 fprintf (vect_dump, "bit-precision arithmetic not supported.");
2712 return false;
2713 }
2714
2715 op0 = gimple_assign_rhs1 (stmt);
2716 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2717 &def_stmt, &def, &dt[0], &vectype))
2718 {
2719 if (vect_print_dump_info (REPORT_DETAILS))
2720 fprintf (vect_dump, "use not simple.");
2721 return false;
2722 }
2723 /* If op0 is an external or constant def use a vector type with
2724 the same size as the output vector type. */
2725 if (!vectype)
2726 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2727 if (vec_stmt)
2728 gcc_assert (vectype);
2729 if (!vectype)
2730 {
2731 if (vect_print_dump_info (REPORT_DETAILS))
2732 {
2733 fprintf (vect_dump, "no vectype for scalar type ");
2734 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2735 }
2736
2737 return false;
2738 }
2739
2740 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2741 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2742 if (nunits_out != nunits_in)
2743 return false;
2744
2745 if (op_type == binary_op || op_type == ternary_op)
2746 {
2747 op1 = gimple_assign_rhs2 (stmt);
2748 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2749 &dt[1]))
2750 {
2751 if (vect_print_dump_info (REPORT_DETAILS))
2752 fprintf (vect_dump, "use not simple.");
2753 return false;
2754 }
2755 }
2756 if (op_type == ternary_op)
2757 {
2758 op2 = gimple_assign_rhs3 (stmt);
2759 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2760 &dt[2]))
2761 {
2762 if (vect_print_dump_info (REPORT_DETAILS))
2763 fprintf (vect_dump, "use not simple.");
2764 return false;
2765 }
2766 }
2767
2768 if (loop_vinfo)
2769 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2770 else
2771 vf = 1;
2772
2773 /* Multiple types in SLP are handled by creating the appropriate number of
2774 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2775 case of SLP. */
2776 if (slp_node || PURE_SLP_STMT (stmt_info))
2777 ncopies = 1;
2778 else
2779 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2780
2781 gcc_assert (ncopies >= 1);
2782
2783 /* Shifts are handled in vectorizable_shift (). */
2784 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2785 || code == RROTATE_EXPR)
2786 return false;
2787
2788 optab = optab_for_tree_code (code, vectype, optab_default);
2789
2790 /* Supportable by target? */
2791 if (!optab)
2792 {
2793 if (vect_print_dump_info (REPORT_DETAILS))
2794 fprintf (vect_dump, "no optab.");
2795 return false;
2796 }
2797 vec_mode = TYPE_MODE (vectype);
2798 icode = (int) optab_handler (optab, vec_mode);
2799 if (icode == CODE_FOR_nothing)
2800 {
2801 if (vect_print_dump_info (REPORT_DETAILS))
2802 fprintf (vect_dump, "op not supported by target.");
2803 /* Check only during analysis. */
2804 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2805 || (vf < vect_min_worthwhile_factor (code)
2806 && !vec_stmt))
2807 return false;
2808 if (vect_print_dump_info (REPORT_DETAILS))
2809 fprintf (vect_dump, "proceeding using word mode.");
2810 }
2811
2812 /* Worthwhile without SIMD support? Check only during analysis. */
2813 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2814 && vf < vect_min_worthwhile_factor (code)
2815 && !vec_stmt)
2816 {
2817 if (vect_print_dump_info (REPORT_DETAILS))
2818 fprintf (vect_dump, "not worthwhile without SIMD support.");
2819 return false;
2820 }
2821
2822 if (!vec_stmt) /* transformation not required. */
2823 {
2824 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2825 if (vect_print_dump_info (REPORT_DETAILS))
2826 fprintf (vect_dump, "=== vectorizable_operation ===");
2827 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2828 return true;
2829 }
2830
2831 /** Transform. **/
2832
2833 if (vect_print_dump_info (REPORT_DETAILS))
2834 fprintf (vect_dump, "transform binary/unary operation.");
2835
2836 /* Handle def. */
2837 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2838
2839 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2840 created in the previous stages of the recursion, so no allocation is
2841 needed, except for the case of shift with scalar shift argument. In that
2842 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2843 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2844 In case of loop-based vectorization we allocate VECs of size 1. We
2845 allocate VEC_OPRNDS1 only in case of binary operation. */
2846 if (!slp_node)
2847 {
2848 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2849 if (op_type == binary_op || op_type == ternary_op)
2850 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2851 if (op_type == ternary_op)
2852 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2853 }
2854
2855 /* In case the vectorization factor (VF) is bigger than the number
2856 of elements that we can fit in a vectype (nunits), we have to generate
2857 more than one vector stmt - i.e - we need to "unroll" the
2858 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2859 from one copy of the vector stmt to the next, in the field
2860 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2861 stages to find the correct vector defs to be used when vectorizing
2862 stmts that use the defs of the current stmt. The example below
2863 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2864 we need to create 4 vectorized stmts):
2865
2866 before vectorization:
2867 RELATED_STMT VEC_STMT
2868 S1: x = memref - -
2869 S2: z = x + 1 - -
2870
2871 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2872 there):
2873 RELATED_STMT VEC_STMT
2874 VS1_0: vx0 = memref0 VS1_1 -
2875 VS1_1: vx1 = memref1 VS1_2 -
2876 VS1_2: vx2 = memref2 VS1_3 -
2877 VS1_3: vx3 = memref3 - -
2878 S1: x = load - VS1_0
2879 S2: z = x + 1 - -
2880
2881 step2: vectorize stmt S2 (done here):
2882 To vectorize stmt S2 we first need to find the relevant vector
2883 def for the first operand 'x'. This is, as usual, obtained from
2884 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2885 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2886 relevant vector def 'vx0'. Having found 'vx0' we can generate
2887 the vector stmt VS2_0, and as usual, record it in the
2888 STMT_VINFO_VEC_STMT of stmt S2.
2889 When creating the second copy (VS2_1), we obtain the relevant vector
2890 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2891 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2892 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2893 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2894 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2895 chain of stmts and pointers:
2896 RELATED_STMT VEC_STMT
2897 VS1_0: vx0 = memref0 VS1_1 -
2898 VS1_1: vx1 = memref1 VS1_2 -
2899 VS1_2: vx2 = memref2 VS1_3 -
2900 VS1_3: vx3 = memref3 - -
2901 S1: x = load - VS1_0
2902 VS2_0: vz0 = vx0 + v1 VS2_1 -
2903 VS2_1: vz1 = vx1 + v1 VS2_2 -
2904 VS2_2: vz2 = vx2 + v1 VS2_3 -
2905 VS2_3: vz3 = vx3 + v1 - -
2906 S2: z = x + 1 - VS2_0 */
2907
2908 prev_stmt_info = NULL;
2909 for (j = 0; j < ncopies; j++)
2910 {
2911 /* Handle uses. */
2912 if (j == 0)
2913 {
2914 if (op_type == binary_op || op_type == ternary_op)
2915 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2916 slp_node);
2917 else
2918 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2919 slp_node);
2920 if (op_type == ternary_op)
2921 {
2922 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2923 VEC_quick_push (tree, vec_oprnds2,
2924 vect_get_vec_def_for_operand (op2, stmt, NULL));
2925 }
2926 }
2927 else
2928 {
2929 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2930 if (op_type == ternary_op)
2931 {
2932 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2933 VEC_quick_push (tree, vec_oprnds2,
2934 vect_get_vec_def_for_stmt_copy (dt[2],
2935 vec_oprnd));
2936 }
2937 }
2938
2939 /* Arguments are ready. Create the new vector stmt. */
2940 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2941 {
2942 vop1 = ((op_type == binary_op || op_type == ternary_op)
2943 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2944 vop2 = ((op_type == ternary_op)
2945 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2946 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2947 vop0, vop1, vop2);
2948 new_temp = make_ssa_name (vec_dest, new_stmt);
2949 gimple_assign_set_lhs (new_stmt, new_temp);
2950 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2951 if (slp_node)
2952 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2953 }
2954
2955 if (slp_node)
2956 continue;
2957
2958 if (j == 0)
2959 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2960 else
2961 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2962 prev_stmt_info = vinfo_for_stmt (new_stmt);
2963 }
2964
2965 VEC_free (tree, heap, vec_oprnds0);
2966 if (vec_oprnds1)
2967 VEC_free (tree, heap, vec_oprnds1);
2968 if (vec_oprnds2)
2969 VEC_free (tree, heap, vec_oprnds2);
2970
2971 return true;
2972 }
2973
2974
2975 /* Get vectorized definitions for loop-based vectorization. For the first
2976 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2977 scalar operand), and for the rest we get a copy with
2978 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2979 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2980 The vectors are collected into VEC_OPRNDS. */
2981
2982 static void
2983 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2984 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2985 {
2986 tree vec_oprnd;
2987
2988 /* Get first vector operand. */
2989 /* All the vector operands except the very first one (that is scalar oprnd)
2990 are stmt copies. */
2991 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2992 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2993 else
2994 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2995
2996 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2997
2998 /* Get second vector operand. */
2999 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3000 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
3001
3002 *oprnd = vec_oprnd;
3003
3004 /* For conversion in multiple steps, continue to get operands
3005 recursively. */
3006 if (multi_step_cvt)
3007 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3008 }
3009
3010
3011 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3012 For multi-step conversions store the resulting vectors and call the function
3013 recursively. */
3014
3015 static void
3016 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
3017 int multi_step_cvt, gimple stmt,
3018 VEC (tree, heap) *vec_dsts,
3019 gimple_stmt_iterator *gsi,
3020 slp_tree slp_node, enum tree_code code,
3021 stmt_vec_info *prev_stmt_info)
3022 {
3023 unsigned int i;
3024 tree vop0, vop1, new_tmp, vec_dest;
3025 gimple new_stmt;
3026 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3027
3028 vec_dest = VEC_pop (tree, vec_dsts);
3029
3030 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
3031 {
3032 /* Create demotion operation. */
3033 vop0 = VEC_index (tree, *vec_oprnds, i);
3034 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
3035 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3036 new_tmp = make_ssa_name (vec_dest, new_stmt);
3037 gimple_assign_set_lhs (new_stmt, new_tmp);
3038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3039
3040 if (multi_step_cvt)
3041 /* Store the resulting vector for next recursive call. */
3042 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
3043 else
3044 {
3045 /* This is the last step of the conversion sequence. Store the
3046 vectors in SLP_NODE or in vector info of the scalar statement
3047 (or in STMT_VINFO_RELATED_STMT chain). */
3048 if (slp_node)
3049 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3050 else
3051 {
3052 if (!*prev_stmt_info)
3053 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3054 else
3055 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3056
3057 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3058 }
3059 }
3060 }
3061
3062 /* For multi-step demotion operations we first generate demotion operations
3063 from the source type to the intermediate types, and then combine the
3064 results (stored in VEC_OPRNDS) in demotion operation to the destination
3065 type. */
3066 if (multi_step_cvt)
3067 {
3068 /* At each level of recursion we have have of the operands we had at the
3069 previous level. */
3070 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
3071 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3072 stmt, vec_dsts, gsi, slp_node,
3073 code, prev_stmt_info);
3074 }
3075 }
3076
3077
3078 /* Function vectorizable_type_demotion
3079
3080 Check if STMT performs a binary or unary operation that involves
3081 type demotion, and if it can be vectorized.
3082 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3083 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3084 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3085
3086 static bool
3087 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3088 gimple *vec_stmt, slp_tree slp_node)
3089 {
3090 tree vec_dest;
3091 tree scalar_dest;
3092 tree op0;
3093 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3094 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3095 enum tree_code code, code1 = ERROR_MARK;
3096 tree def;
3097 gimple def_stmt;
3098 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3099 stmt_vec_info prev_stmt_info;
3100 int nunits_in;
3101 int nunits_out;
3102 tree vectype_out;
3103 int ncopies;
3104 int j, i;
3105 tree vectype_in;
3106 int multi_step_cvt = 0;
3107 VEC (tree, heap) *vec_oprnds0 = NULL;
3108 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3109 tree last_oprnd, intermediate_type;
3110 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3111
3112 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3113 return false;
3114
3115 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3116 return false;
3117
3118 /* Is STMT a vectorizable type-demotion operation? */
3119 if (!is_gimple_assign (stmt))
3120 return false;
3121
3122 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3123 return false;
3124
3125 code = gimple_assign_rhs_code (stmt);
3126 if (!CONVERT_EXPR_CODE_P (code))
3127 return false;
3128
3129 scalar_dest = gimple_assign_lhs (stmt);
3130 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3131
3132 /* Check the operands of the operation. */
3133 op0 = gimple_assign_rhs1 (stmt);
3134 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3135 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3136 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3137 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)))))
3138 return false;
3139
3140 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3141 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3142 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3143 || ((TYPE_PRECISION (TREE_TYPE (op0))
3144 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3145 {
3146 if (vect_print_dump_info (REPORT_DETAILS))
3147 fprintf (vect_dump, "type demotion to/from bit-precision unsupported.");
3148 return false;
3149 }
3150
3151 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3152 &def_stmt, &def, &dt[0], &vectype_in))
3153 {
3154 if (vect_print_dump_info (REPORT_DETAILS))
3155 fprintf (vect_dump, "use not simple.");
3156 return false;
3157 }
3158 /* If op0 is an external def use a vector type with the
3159 same size as the output vector type if possible. */
3160 if (!vectype_in)
3161 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3162 if (vec_stmt)
3163 gcc_assert (vectype_in);
3164 if (!vectype_in)
3165 {
3166 if (vect_print_dump_info (REPORT_DETAILS))
3167 {
3168 fprintf (vect_dump, "no vectype for scalar type ");
3169 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3170 }
3171
3172 return false;
3173 }
3174
3175 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3176 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3177 if (nunits_in >= nunits_out)
3178 return false;
3179
3180 /* Multiple types in SLP are handled by creating the appropriate number of
3181 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3182 case of SLP. */
3183 if (slp_node || PURE_SLP_STMT (stmt_info))
3184 ncopies = 1;
3185 else
3186 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3187 gcc_assert (ncopies >= 1);
3188
3189 /* Supportable by target? */
3190 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3191 &code1, &multi_step_cvt, &interm_types))
3192 return false;
3193
3194 if (!vec_stmt) /* transformation not required. */
3195 {
3196 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3197 if (vect_print_dump_info (REPORT_DETAILS))
3198 fprintf (vect_dump, "=== vectorizable_demotion ===");
3199 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3200 return true;
3201 }
3202
3203 /** Transform. **/
3204 if (vect_print_dump_info (REPORT_DETAILS))
3205 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3206 ncopies);
3207
3208 /* In case of multi-step demotion, we first generate demotion operations to
3209 the intermediate types, and then from that types to the final one.
3210 We create vector destinations for the intermediate type (TYPES) received
3211 from supportable_narrowing_operation, and store them in the correct order
3212 for future use in vect_create_vectorized_demotion_stmts(). */
3213 if (multi_step_cvt)
3214 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3215 else
3216 vec_dsts = VEC_alloc (tree, heap, 1);
3217
3218 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3219 VEC_quick_push (tree, vec_dsts, vec_dest);
3220
3221 if (multi_step_cvt)
3222 {
3223 for (i = VEC_length (tree, interm_types) - 1;
3224 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3225 {
3226 vec_dest = vect_create_destination_var (scalar_dest,
3227 intermediate_type);
3228 VEC_quick_push (tree, vec_dsts, vec_dest);
3229 }
3230 }
3231
3232 /* In case the vectorization factor (VF) is bigger than the number
3233 of elements that we can fit in a vectype (nunits), we have to generate
3234 more than one vector stmt - i.e - we need to "unroll" the
3235 vector stmt by a factor VF/nunits. */
3236 last_oprnd = op0;
3237 prev_stmt_info = NULL;
3238 for (j = 0; j < ncopies; j++)
3239 {
3240 /* Handle uses. */
3241 if (slp_node)
3242 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
3243 else
3244 {
3245 VEC_free (tree, heap, vec_oprnds0);
3246 vec_oprnds0 = VEC_alloc (tree, heap,
3247 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
3248 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3249 vect_pow2 (multi_step_cvt) - 1);
3250 }
3251
3252 /* Arguments are ready. Create the new vector stmts. */
3253 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3254 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
3255 multi_step_cvt, stmt, tmp_vec_dsts,
3256 gsi, slp_node, code1,
3257 &prev_stmt_info);
3258 }
3259
3260 VEC_free (tree, heap, vec_oprnds0);
3261 VEC_free (tree, heap, vec_dsts);
3262 VEC_free (tree, heap, tmp_vec_dsts);
3263 VEC_free (tree, heap, interm_types);
3264
3265 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3266 return true;
3267 }
3268
3269
3270 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3271 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3272 the resulting vectors and call the function recursively. */
3273
3274 static void
3275 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3276 VEC (tree, heap) **vec_oprnds1,
3277 int multi_step_cvt, gimple stmt,
3278 VEC (tree, heap) *vec_dsts,
3279 gimple_stmt_iterator *gsi,
3280 slp_tree slp_node, enum tree_code code1,
3281 enum tree_code code2, tree decl1,
3282 tree decl2, int op_type,
3283 stmt_vec_info *prev_stmt_info)
3284 {
3285 int i;
3286 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3287 gimple new_stmt1, new_stmt2;
3288 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3289 VEC (tree, heap) *vec_tmp;
3290
3291 vec_dest = VEC_pop (tree, vec_dsts);
3292 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3293
3294 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
3295 {
3296 if (op_type == binary_op)
3297 vop1 = VEC_index (tree, *vec_oprnds1, i);
3298 else
3299 vop1 = NULL_TREE;
3300
3301 /* Generate the two halves of promotion operation. */
3302 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3303 op_type, vec_dest, gsi, stmt);
3304 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3305 op_type, vec_dest, gsi, stmt);
3306 if (is_gimple_call (new_stmt1))
3307 {
3308 new_tmp1 = gimple_call_lhs (new_stmt1);
3309 new_tmp2 = gimple_call_lhs (new_stmt2);
3310 }
3311 else
3312 {
3313 new_tmp1 = gimple_assign_lhs (new_stmt1);
3314 new_tmp2 = gimple_assign_lhs (new_stmt2);
3315 }
3316
3317 if (multi_step_cvt)
3318 {
3319 /* Store the results for the recursive call. */
3320 VEC_quick_push (tree, vec_tmp, new_tmp1);
3321 VEC_quick_push (tree, vec_tmp, new_tmp2);
3322 }
3323 else
3324 {
3325 /* Last step of promotion sequience - store the results. */
3326 if (slp_node)
3327 {
3328 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3329 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3330 }
3331 else
3332 {
3333 if (!*prev_stmt_info)
3334 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3335 else
3336 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3337
3338 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3339 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3340 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3341 }
3342 }
3343 }
3344
3345 if (multi_step_cvt)
3346 {
3347 /* For multi-step promotion operation we first generate we call the
3348 function recurcively for every stage. We start from the input type,
3349 create promotion operations to the intermediate types, and then
3350 create promotions to the output type. */
3351 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
3352 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3353 multi_step_cvt - 1, stmt,
3354 vec_dsts, gsi, slp_node, code1,
3355 code2, decl2, decl2, op_type,
3356 prev_stmt_info);
3357 }
3358
3359 VEC_free (tree, heap, vec_tmp);
3360 }
3361
3362
3363 /* Function vectorizable_type_promotion
3364
3365 Check if STMT performs a binary or unary operation that involves
3366 type promotion, and if it can be vectorized.
3367 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3368 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3369 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3370
3371 static bool
3372 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3373 gimple *vec_stmt, slp_tree slp_node)
3374 {
3375 tree vec_dest;
3376 tree scalar_dest;
3377 tree op0, op1 = NULL;
3378 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3379 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3380 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3381 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3382 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3383 int op_type;
3384 tree def;
3385 gimple def_stmt;
3386 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3387 stmt_vec_info prev_stmt_info;
3388 int nunits_in;
3389 int nunits_out;
3390 tree vectype_out;
3391 int ncopies;
3392 int j, i;
3393 tree vectype_in;
3394 tree intermediate_type = NULL_TREE;
3395 int multi_step_cvt = 0;
3396 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3397 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3398 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3399 unsigned int k;
3400
3401 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3402 return false;
3403
3404 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3405 return false;
3406
3407 /* Is STMT a vectorizable type-promotion operation? */
3408 if (!is_gimple_assign (stmt))
3409 return false;
3410
3411 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3412 return false;
3413
3414 code = gimple_assign_rhs_code (stmt);
3415 if (!CONVERT_EXPR_CODE_P (code)
3416 && code != WIDEN_MULT_EXPR
3417 && code != WIDEN_LSHIFT_EXPR)
3418 return false;
3419
3420 scalar_dest = gimple_assign_lhs (stmt);
3421 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3422
3423 /* Check the operands of the operation. */
3424 op0 = gimple_assign_rhs1 (stmt);
3425 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3426 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3427 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3428 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3429 && CONVERT_EXPR_CODE_P (code))))
3430 return false;
3431
3432 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3433 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3434 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3435 || ((TYPE_PRECISION (TREE_TYPE (op0))
3436 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0)))))))
3437 {
3438 if (vect_print_dump_info (REPORT_DETAILS))
3439 fprintf (vect_dump, "type promotion to/from bit-precision "
3440 "unsupported.");
3441 return false;
3442 }
3443
3444 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3445 &def_stmt, &def, &dt[0], &vectype_in))
3446 {
3447 if (vect_print_dump_info (REPORT_DETAILS))
3448 fprintf (vect_dump, "use not simple.");
3449 return false;
3450 }
3451
3452 op_type = TREE_CODE_LENGTH (code);
3453 if (op_type == binary_op)
3454 {
3455 bool ok;
3456
3457 op1 = gimple_assign_rhs2 (stmt);
3458 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3459 {
3460 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3461 OP1. */
3462 if (CONSTANT_CLASS_P (op0))
3463 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3464 &def_stmt, &def, &dt[1], &vectype_in);
3465 else
3466 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3467 &dt[1]);
3468
3469 if (!ok)
3470 {
3471 if (vect_print_dump_info (REPORT_DETAILS))
3472 fprintf (vect_dump, "use not simple.");
3473 return false;
3474 }
3475 }
3476 }
3477
3478 /* If op0 is an external or constant def use a vector type with
3479 the same size as the output vector type. */
3480 if (!vectype_in)
3481 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3482 if (vec_stmt)
3483 gcc_assert (vectype_in);
3484 if (!vectype_in)
3485 {
3486 if (vect_print_dump_info (REPORT_DETAILS))
3487 {
3488 fprintf (vect_dump, "no vectype for scalar type ");
3489 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3490 }
3491
3492 return false;
3493 }
3494
3495 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3496 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3497 if (nunits_in <= nunits_out)
3498 return false;
3499
3500 /* Multiple types in SLP are handled by creating the appropriate number of
3501 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3502 case of SLP. */
3503 if (slp_node || PURE_SLP_STMT (stmt_info))
3504 ncopies = 1;
3505 else
3506 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3507
3508 gcc_assert (ncopies >= 1);
3509
3510 /* Supportable by target? */
3511 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3512 &decl1, &decl2, &code1, &code2,
3513 &multi_step_cvt, &interm_types))
3514 return false;
3515
3516 /* Binary widening operation can only be supported directly by the
3517 architecture. */
3518 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3519
3520 if (!vec_stmt) /* transformation not required. */
3521 {
3522 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3523 if (vect_print_dump_info (REPORT_DETAILS))
3524 fprintf (vect_dump, "=== vectorizable_promotion ===");
3525 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3526 return true;
3527 }
3528
3529 /** Transform. **/
3530
3531 if (vect_print_dump_info (REPORT_DETAILS))
3532 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3533 ncopies);
3534
3535 if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
3536 {
3537 if (CONSTANT_CLASS_P (op0))
3538 op0 = fold_convert (TREE_TYPE (op1), op0);
3539 else if (CONSTANT_CLASS_P (op1))
3540 op1 = fold_convert (TREE_TYPE (op0), op1);
3541 }
3542
3543 /* Handle def. */
3544 /* In case of multi-step promotion, we first generate promotion operations
3545 to the intermediate types, and then from that types to the final one.
3546 We store vector destination in VEC_DSTS in the correct order for
3547 recursive creation of promotion operations in
3548 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3549 according to TYPES recieved from supportable_widening_operation(). */
3550 if (multi_step_cvt)
3551 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3552 else
3553 vec_dsts = VEC_alloc (tree, heap, 1);
3554
3555 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3556 VEC_quick_push (tree, vec_dsts, vec_dest);
3557
3558 if (multi_step_cvt)
3559 {
3560 for (i = VEC_length (tree, interm_types) - 1;
3561 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3562 {
3563 vec_dest = vect_create_destination_var (scalar_dest,
3564 intermediate_type);
3565 VEC_quick_push (tree, vec_dsts, vec_dest);
3566 }
3567 }
3568
3569 if (!slp_node)
3570 {
3571 vec_oprnds0 = VEC_alloc (tree, heap,
3572 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3573 if (op_type == binary_op)
3574 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3575 }
3576 else if (code == WIDEN_LSHIFT_EXPR)
3577 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3578
3579 /* In case the vectorization factor (VF) is bigger than the number
3580 of elements that we can fit in a vectype (nunits), we have to generate
3581 more than one vector stmt - i.e - we need to "unroll" the
3582 vector stmt by a factor VF/nunits. */
3583
3584 prev_stmt_info = NULL;
3585 for (j = 0; j < ncopies; j++)
3586 {
3587 /* Handle uses. */
3588 if (j == 0)
3589 {
3590 if (slp_node)
3591 {
3592 if (code == WIDEN_LSHIFT_EXPR)
3593 {
3594 vec_oprnd1 = op1;
3595 /* Store vec_oprnd1 for every vector stmt to be created
3596 for SLP_NODE. We check during the analysis that all
3597 the shift arguments are the same. */
3598 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3599 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3600
3601 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
3602 -1);
3603 }
3604 else
3605 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3606 &vec_oprnds1, -1);
3607 }
3608 else
3609 {
3610 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3611 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3612 if (op_type == binary_op)
3613 {
3614 if (code == WIDEN_LSHIFT_EXPR)
3615 vec_oprnd1 = op1;
3616 else
3617 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3618 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3619 }
3620 }
3621 }
3622 else
3623 {
3624 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3625 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3626 if (op_type == binary_op)
3627 {
3628 if (code == WIDEN_LSHIFT_EXPR)
3629 vec_oprnd1 = op1;
3630 else
3631 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3632 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3633 }
3634 }
3635
3636 /* Arguments are ready. Create the new vector stmts. */
3637 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3638 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
3639 multi_step_cvt, stmt,
3640 tmp_vec_dsts,
3641 gsi, slp_node, code1, code2,
3642 decl1, decl2, op_type,
3643 &prev_stmt_info);
3644 }
3645
3646 VEC_free (tree, heap, vec_dsts);
3647 VEC_free (tree, heap, tmp_vec_dsts);
3648 VEC_free (tree, heap, interm_types);
3649 VEC_free (tree, heap, vec_oprnds0);
3650 VEC_free (tree, heap, vec_oprnds1);
3651
3652 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3653 return true;
3654 }
3655
3656
3657 /* Function vectorizable_store.
3658
3659 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3660 can be vectorized.
3661 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3662 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3663 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3664
3665 static bool
3666 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3667 slp_tree slp_node)
3668 {
3669 tree scalar_dest;
3670 tree data_ref;
3671 tree op;
3672 tree vec_oprnd = NULL_TREE;
3673 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3674 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3675 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3676 tree elem_type;
3677 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3678 struct loop *loop = NULL;
3679 enum machine_mode vec_mode;
3680 tree dummy;
3681 enum dr_alignment_support alignment_support_scheme;
3682 tree def;
3683 gimple def_stmt;
3684 enum vect_def_type dt;
3685 stmt_vec_info prev_stmt_info = NULL;
3686 tree dataref_ptr = NULL_TREE;
3687 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3688 int ncopies;
3689 int j;
3690 gimple next_stmt, first_stmt = NULL;
3691 bool strided_store = false;
3692 bool store_lanes_p = false;
3693 unsigned int group_size, i;
3694 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3695 bool inv_p;
3696 VEC(tree,heap) *vec_oprnds = NULL;
3697 bool slp = (slp_node != NULL);
3698 unsigned int vec_num;
3699 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3700 tree aggr_type;
3701
3702 if (loop_vinfo)
3703 loop = LOOP_VINFO_LOOP (loop_vinfo);
3704
3705 /* Multiple types in SLP are handled by creating the appropriate number of
3706 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3707 case of SLP. */
3708 if (slp || PURE_SLP_STMT (stmt_info))
3709 ncopies = 1;
3710 else
3711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3712
3713 gcc_assert (ncopies >= 1);
3714
3715 /* FORNOW. This restriction should be relaxed. */
3716 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3717 {
3718 if (vect_print_dump_info (REPORT_DETAILS))
3719 fprintf (vect_dump, "multiple types in nested loop.");
3720 return false;
3721 }
3722
3723 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3724 return false;
3725
3726 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3727 return false;
3728
3729 /* Is vectorizable store? */
3730
3731 if (!is_gimple_assign (stmt))
3732 return false;
3733
3734 scalar_dest = gimple_assign_lhs (stmt);
3735 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3736 && is_pattern_stmt_p (stmt_info))
3737 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3738 if (TREE_CODE (scalar_dest) != ARRAY_REF
3739 && TREE_CODE (scalar_dest) != INDIRECT_REF
3740 && TREE_CODE (scalar_dest) != COMPONENT_REF
3741 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3742 && TREE_CODE (scalar_dest) != REALPART_EXPR
3743 && TREE_CODE (scalar_dest) != MEM_REF)
3744 return false;
3745
3746 gcc_assert (gimple_assign_single_p (stmt));
3747 op = gimple_assign_rhs1 (stmt);
3748 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3749 {
3750 if (vect_print_dump_info (REPORT_DETAILS))
3751 fprintf (vect_dump, "use not simple.");
3752 return false;
3753 }
3754
3755 elem_type = TREE_TYPE (vectype);
3756 vec_mode = TYPE_MODE (vectype);
3757
3758 /* FORNOW. In some cases can vectorize even if data-type not supported
3759 (e.g. - array initialization with 0). */
3760 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3761 return false;
3762
3763 if (!STMT_VINFO_DATA_REF (stmt_info))
3764 return false;
3765
3766 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3767 {
3768 if (vect_print_dump_info (REPORT_DETAILS))
3769 fprintf (vect_dump, "negative step for store.");
3770 return false;
3771 }
3772
3773 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3774 {
3775 strided_store = true;
3776 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3777 if (!slp && !PURE_SLP_STMT (stmt_info))
3778 {
3779 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3780 if (vect_store_lanes_supported (vectype, group_size))
3781 store_lanes_p = true;
3782 else if (!vect_strided_store_supported (vectype, group_size))
3783 return false;
3784 }
3785
3786 if (first_stmt == stmt)
3787 {
3788 /* STMT is the leader of the group. Check the operands of all the
3789 stmts of the group. */
3790 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3791 while (next_stmt)
3792 {
3793 gcc_assert (gimple_assign_single_p (next_stmt));
3794 op = gimple_assign_rhs1 (next_stmt);
3795 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3796 &def, &dt))
3797 {
3798 if (vect_print_dump_info (REPORT_DETAILS))
3799 fprintf (vect_dump, "use not simple.");
3800 return false;
3801 }
3802 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3803 }
3804 }
3805 }
3806
3807 if (!vec_stmt) /* transformation not required. */
3808 {
3809 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3810 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3811 return true;
3812 }
3813
3814 /** Transform. **/
3815
3816 if (strided_store)
3817 {
3818 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3819 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3820
3821 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3822
3823 /* FORNOW */
3824 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3825
3826 /* We vectorize all the stmts of the interleaving group when we
3827 reach the last stmt in the group. */
3828 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3829 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3830 && !slp)
3831 {
3832 *vec_stmt = NULL;
3833 return true;
3834 }
3835
3836 if (slp)
3837 {
3838 strided_store = false;
3839 /* VEC_NUM is the number of vect stmts to be created for this
3840 group. */
3841 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3842 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3843 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3844 }
3845 else
3846 /* VEC_NUM is the number of vect stmts to be created for this
3847 group. */
3848 vec_num = group_size;
3849 }
3850 else
3851 {
3852 first_stmt = stmt;
3853 first_dr = dr;
3854 group_size = vec_num = 1;
3855 }
3856
3857 if (vect_print_dump_info (REPORT_DETAILS))
3858 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3859
3860 dr_chain = VEC_alloc (tree, heap, group_size);
3861 oprnds = VEC_alloc (tree, heap, group_size);
3862
3863 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3864 gcc_assert (alignment_support_scheme);
3865 /* Targets with store-lane instructions must not require explicit
3866 realignment. */
3867 gcc_assert (!store_lanes_p
3868 || alignment_support_scheme == dr_aligned
3869 || alignment_support_scheme == dr_unaligned_supported);
3870
3871 if (store_lanes_p)
3872 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3873 else
3874 aggr_type = vectype;
3875
3876 /* In case the vectorization factor (VF) is bigger than the number
3877 of elements that we can fit in a vectype (nunits), we have to generate
3878 more than one vector stmt - i.e - we need to "unroll" the
3879 vector stmt by a factor VF/nunits. For more details see documentation in
3880 vect_get_vec_def_for_copy_stmt. */
3881
3882 /* In case of interleaving (non-unit strided access):
3883
3884 S1: &base + 2 = x2
3885 S2: &base = x0
3886 S3: &base + 1 = x1
3887 S4: &base + 3 = x3
3888
3889 We create vectorized stores starting from base address (the access of the
3890 first stmt in the chain (S2 in the above example), when the last store stmt
3891 of the chain (S4) is reached:
3892
3893 VS1: &base = vx2
3894 VS2: &base + vec_size*1 = vx0
3895 VS3: &base + vec_size*2 = vx1
3896 VS4: &base + vec_size*3 = vx3
3897
3898 Then permutation statements are generated:
3899
3900 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3901 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3902 ...
3903
3904 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3905 (the order of the data-refs in the output of vect_permute_store_chain
3906 corresponds to the order of scalar stmts in the interleaving chain - see
3907 the documentation of vect_permute_store_chain()).
3908
3909 In case of both multiple types and interleaving, above vector stores and
3910 permutation stmts are created for every copy. The result vector stmts are
3911 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3912 STMT_VINFO_RELATED_STMT for the next copies.
3913 */
3914
3915 prev_stmt_info = NULL;
3916 for (j = 0; j < ncopies; j++)
3917 {
3918 gimple new_stmt;
3919 gimple ptr_incr;
3920
3921 if (j == 0)
3922 {
3923 if (slp)
3924 {
3925 /* Get vectorized arguments for SLP_NODE. */
3926 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3927 NULL, -1);
3928
3929 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3930 }
3931 else
3932 {
3933 /* For interleaved stores we collect vectorized defs for all the
3934 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3935 used as an input to vect_permute_store_chain(), and OPRNDS as
3936 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3937
3938 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3939 OPRNDS are of size 1. */
3940 next_stmt = first_stmt;
3941 for (i = 0; i < group_size; i++)
3942 {
3943 /* Since gaps are not supported for interleaved stores,
3944 GROUP_SIZE is the exact number of stmts in the chain.
3945 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3946 there is no interleaving, GROUP_SIZE is 1, and only one
3947 iteration of the loop will be executed. */
3948 gcc_assert (next_stmt
3949 && gimple_assign_single_p (next_stmt));
3950 op = gimple_assign_rhs1 (next_stmt);
3951
3952 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3953 NULL);
3954 VEC_quick_push(tree, dr_chain, vec_oprnd);
3955 VEC_quick_push(tree, oprnds, vec_oprnd);
3956 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3957 }
3958 }
3959
3960 /* We should have catched mismatched types earlier. */
3961 gcc_assert (useless_type_conversion_p (vectype,
3962 TREE_TYPE (vec_oprnd)));
3963 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3964 NULL_TREE, &dummy, gsi,
3965 &ptr_incr, false, &inv_p);
3966 gcc_assert (bb_vinfo || !inv_p);
3967 }
3968 else
3969 {
3970 /* For interleaved stores we created vectorized defs for all the
3971 defs stored in OPRNDS in the previous iteration (previous copy).
3972 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3973 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3974 next copy.
3975 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3976 OPRNDS are of size 1. */
3977 for (i = 0; i < group_size; i++)
3978 {
3979 op = VEC_index (tree, oprnds, i);
3980 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3981 &dt);
3982 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3983 VEC_replace(tree, dr_chain, i, vec_oprnd);
3984 VEC_replace(tree, oprnds, i, vec_oprnd);
3985 }
3986 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3987 TYPE_SIZE_UNIT (aggr_type));
3988 }
3989
3990 if (store_lanes_p)
3991 {
3992 tree vec_array;
3993
3994 /* Combine all the vectors into an array. */
3995 vec_array = create_vector_array (vectype, vec_num);
3996 for (i = 0; i < vec_num; i++)
3997 {
3998 vec_oprnd = VEC_index (tree, dr_chain, i);
3999 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4000 }
4001
4002 /* Emit:
4003 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4004 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4005 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4006 gimple_call_set_lhs (new_stmt, data_ref);
4007 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4008 mark_symbols_for_renaming (new_stmt);
4009 }
4010 else
4011 {
4012 new_stmt = NULL;
4013 if (strided_store)
4014 {
4015 result_chain = VEC_alloc (tree, heap, group_size);
4016 /* Permute. */
4017 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4018 &result_chain);
4019 }
4020
4021 next_stmt = first_stmt;
4022 for (i = 0; i < vec_num; i++)
4023 {
4024 struct ptr_info_def *pi;
4025
4026 if (i > 0)
4027 /* Bump the vector pointer. */
4028 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4029 stmt, NULL_TREE);
4030
4031 if (slp)
4032 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4033 else if (strided_store)
4034 /* For strided stores vectorized defs are interleaved in
4035 vect_permute_store_chain(). */
4036 vec_oprnd = VEC_index (tree, result_chain, i);
4037
4038 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4039 build_int_cst (reference_alias_ptr_type
4040 (DR_REF (first_dr)), 0));
4041 pi = get_ptr_info (dataref_ptr);
4042 pi->align = TYPE_ALIGN_UNIT (vectype);
4043 if (aligned_access_p (first_dr))
4044 pi->misalign = 0;
4045 else if (DR_MISALIGNMENT (first_dr) == -1)
4046 {
4047 TREE_TYPE (data_ref)
4048 = build_aligned_type (TREE_TYPE (data_ref),
4049 TYPE_ALIGN (elem_type));
4050 pi->align = TYPE_ALIGN_UNIT (elem_type);
4051 pi->misalign = 0;
4052 }
4053 else
4054 {
4055 TREE_TYPE (data_ref)
4056 = build_aligned_type (TREE_TYPE (data_ref),
4057 TYPE_ALIGN (elem_type));
4058 pi->misalign = DR_MISALIGNMENT (first_dr);
4059 }
4060
4061 /* Arguments are ready. Create the new vector stmt. */
4062 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4063 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4064 mark_symbols_for_renaming (new_stmt);
4065
4066 if (slp)
4067 continue;
4068
4069 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4070 if (!next_stmt)
4071 break;
4072 }
4073 }
4074 if (!slp)
4075 {
4076 if (j == 0)
4077 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4078 else
4079 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4080 prev_stmt_info = vinfo_for_stmt (new_stmt);
4081 }
4082 }
4083
4084 VEC_free (tree, heap, dr_chain);
4085 VEC_free (tree, heap, oprnds);
4086 if (result_chain)
4087 VEC_free (tree, heap, result_chain);
4088 if (vec_oprnds)
4089 VEC_free (tree, heap, vec_oprnds);
4090
4091 return true;
4092 }
4093
4094 /* Given a vector type VECTYPE returns a builtin DECL to be used
4095 for vector permutation and returns the mask that implements
4096 reversal of the vector elements. If that is impossible to do,
4097 returns NULL. */
4098
4099 static tree
4100 perm_mask_for_reverse (tree vectype)
4101 {
4102 tree mask_elt_type, mask_type, mask_vec;
4103 int i, nunits;
4104 unsigned char *sel;
4105
4106 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4107 sel = XALLOCAVEC (unsigned char, nunits);
4108
4109 for (i = 0; i < nunits; ++i)
4110 sel[i] = nunits - 1 - i;
4111
4112 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4113 return NULL;
4114
4115 mask_elt_type
4116 = lang_hooks.types.type_for_size
4117 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4118 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4119
4120 mask_vec = NULL;
4121 for (i = 0; i < nunits; i++)
4122 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, i), mask_vec);
4123 mask_vec = build_vector (mask_type, mask_vec);
4124
4125 return mask_vec;
4126 }
4127
4128 /* Given a vector variable X, that was generated for the scalar LHS of
4129 STMT, generate instructions to reverse the vector elements of X,
4130 insert them a *GSI and return the permuted vector variable. */
4131
4132 static tree
4133 reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
4134 {
4135 tree vectype = TREE_TYPE (x);
4136 tree mask_vec, perm_dest, data_ref;
4137 gimple perm_stmt;
4138
4139 mask_vec = perm_mask_for_reverse (vectype);
4140
4141 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4142
4143 /* Generate the permute statement. */
4144 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4145 x, x, mask_vec);
4146 data_ref = make_ssa_name (perm_dest, perm_stmt);
4147 gimple_set_lhs (perm_stmt, data_ref);
4148 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4149
4150 return data_ref;
4151 }
4152
4153 /* vectorizable_load.
4154
4155 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4156 can be vectorized.
4157 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4158 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4159 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4160
4161 static bool
4162 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4163 slp_tree slp_node, slp_instance slp_node_instance)
4164 {
4165 tree scalar_dest;
4166 tree vec_dest = NULL;
4167 tree data_ref = NULL;
4168 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4169 stmt_vec_info prev_stmt_info;
4170 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4171 struct loop *loop = NULL;
4172 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4173 bool nested_in_vect_loop = false;
4174 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4175 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4176 tree elem_type;
4177 tree new_temp;
4178 enum machine_mode mode;
4179 gimple new_stmt = NULL;
4180 tree dummy;
4181 enum dr_alignment_support alignment_support_scheme;
4182 tree dataref_ptr = NULL_TREE;
4183 gimple ptr_incr;
4184 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4185 int ncopies;
4186 int i, j, group_size;
4187 tree msq = NULL_TREE, lsq;
4188 tree offset = NULL_TREE;
4189 tree realignment_token = NULL_TREE;
4190 gimple phi = NULL;
4191 VEC(tree,heap) *dr_chain = NULL;
4192 bool strided_load = false;
4193 bool load_lanes_p = false;
4194 gimple first_stmt;
4195 bool inv_p;
4196 bool negative;
4197 bool compute_in_loop = false;
4198 struct loop *at_loop;
4199 int vec_num;
4200 bool slp = (slp_node != NULL);
4201 bool slp_perm = false;
4202 enum tree_code code;
4203 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4204 int vf;
4205 tree aggr_type;
4206
4207 if (loop_vinfo)
4208 {
4209 loop = LOOP_VINFO_LOOP (loop_vinfo);
4210 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4211 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4212 }
4213 else
4214 vf = 1;
4215
4216 /* Multiple types in SLP are handled by creating the appropriate number of
4217 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4218 case of SLP. */
4219 if (slp || PURE_SLP_STMT (stmt_info))
4220 ncopies = 1;
4221 else
4222 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4223
4224 gcc_assert (ncopies >= 1);
4225
4226 /* FORNOW. This restriction should be relaxed. */
4227 if (nested_in_vect_loop && ncopies > 1)
4228 {
4229 if (vect_print_dump_info (REPORT_DETAILS))
4230 fprintf (vect_dump, "multiple types in nested loop.");
4231 return false;
4232 }
4233
4234 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4235 return false;
4236
4237 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4238 return false;
4239
4240 /* Is vectorizable load? */
4241 if (!is_gimple_assign (stmt))
4242 return false;
4243
4244 scalar_dest = gimple_assign_lhs (stmt);
4245 if (TREE_CODE (scalar_dest) != SSA_NAME)
4246 return false;
4247
4248 code = gimple_assign_rhs_code (stmt);
4249 if (code != ARRAY_REF
4250 && code != INDIRECT_REF
4251 && code != COMPONENT_REF
4252 && code != IMAGPART_EXPR
4253 && code != REALPART_EXPR
4254 && code != MEM_REF
4255 && TREE_CODE_CLASS (code) != tcc_declaration)
4256 return false;
4257
4258 if (!STMT_VINFO_DATA_REF (stmt_info))
4259 return false;
4260
4261 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4262 if (negative && ncopies > 1)
4263 {
4264 if (vect_print_dump_info (REPORT_DETAILS))
4265 fprintf (vect_dump, "multiple types with negative step.");
4266 return false;
4267 }
4268
4269 elem_type = TREE_TYPE (vectype);
4270 mode = TYPE_MODE (vectype);
4271
4272 /* FORNOW. In some cases can vectorize even if data-type not supported
4273 (e.g. - data copies). */
4274 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4275 {
4276 if (vect_print_dump_info (REPORT_DETAILS))
4277 fprintf (vect_dump, "Aligned load, but unsupported type.");
4278 return false;
4279 }
4280
4281 /* Check if the load is a part of an interleaving chain. */
4282 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4283 {
4284 strided_load = true;
4285 /* FORNOW */
4286 gcc_assert (! nested_in_vect_loop);
4287
4288 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4289 if (!slp && !PURE_SLP_STMT (stmt_info))
4290 {
4291 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4292 if (vect_load_lanes_supported (vectype, group_size))
4293 load_lanes_p = true;
4294 else if (!vect_strided_load_supported (vectype, group_size))
4295 return false;
4296 }
4297 }
4298
4299 if (negative)
4300 {
4301 gcc_assert (!strided_load);
4302 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4303 if (alignment_support_scheme != dr_aligned
4304 && alignment_support_scheme != dr_unaligned_supported)
4305 {
4306 if (vect_print_dump_info (REPORT_DETAILS))
4307 fprintf (vect_dump, "negative step but alignment required.");
4308 return false;
4309 }
4310 if (!perm_mask_for_reverse (vectype))
4311 {
4312 if (vect_print_dump_info (REPORT_DETAILS))
4313 fprintf (vect_dump, "negative step and reversing not supported.");
4314 return false;
4315 }
4316 }
4317
4318 if (!vec_stmt) /* transformation not required. */
4319 {
4320 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4321 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4322 return true;
4323 }
4324
4325 if (vect_print_dump_info (REPORT_DETAILS))
4326 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4327
4328 /** Transform. **/
4329
4330 if (strided_load)
4331 {
4332 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4333 if (slp
4334 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4335 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4336 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4337
4338 /* Check if the chain of loads is already vectorized. */
4339 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4340 {
4341 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4342 return true;
4343 }
4344 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4345 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4346
4347 /* VEC_NUM is the number of vect stmts to be created for this group. */
4348 if (slp)
4349 {
4350 strided_load = false;
4351 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4352 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4353 slp_perm = true;
4354 }
4355 else
4356 vec_num = group_size;
4357 }
4358 else
4359 {
4360 first_stmt = stmt;
4361 first_dr = dr;
4362 group_size = vec_num = 1;
4363 }
4364
4365 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4366 gcc_assert (alignment_support_scheme);
4367 /* Targets with load-lane instructions must not require explicit
4368 realignment. */
4369 gcc_assert (!load_lanes_p
4370 || alignment_support_scheme == dr_aligned
4371 || alignment_support_scheme == dr_unaligned_supported);
4372
4373 /* In case the vectorization factor (VF) is bigger than the number
4374 of elements that we can fit in a vectype (nunits), we have to generate
4375 more than one vector stmt - i.e - we need to "unroll" the
4376 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4377 from one copy of the vector stmt to the next, in the field
4378 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4379 stages to find the correct vector defs to be used when vectorizing
4380 stmts that use the defs of the current stmt. The example below
4381 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4382 need to create 4 vectorized stmts):
4383
4384 before vectorization:
4385 RELATED_STMT VEC_STMT
4386 S1: x = memref - -
4387 S2: z = x + 1 - -
4388
4389 step 1: vectorize stmt S1:
4390 We first create the vector stmt VS1_0, and, as usual, record a
4391 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4392 Next, we create the vector stmt VS1_1, and record a pointer to
4393 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4394 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4395 stmts and pointers:
4396 RELATED_STMT VEC_STMT
4397 VS1_0: vx0 = memref0 VS1_1 -
4398 VS1_1: vx1 = memref1 VS1_2 -
4399 VS1_2: vx2 = memref2 VS1_3 -
4400 VS1_3: vx3 = memref3 - -
4401 S1: x = load - VS1_0
4402 S2: z = x + 1 - -
4403
4404 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4405 information we recorded in RELATED_STMT field is used to vectorize
4406 stmt S2. */
4407
4408 /* In case of interleaving (non-unit strided access):
4409
4410 S1: x2 = &base + 2
4411 S2: x0 = &base
4412 S3: x1 = &base + 1
4413 S4: x3 = &base + 3
4414
4415 Vectorized loads are created in the order of memory accesses
4416 starting from the access of the first stmt of the chain:
4417
4418 VS1: vx0 = &base
4419 VS2: vx1 = &base + vec_size*1
4420 VS3: vx3 = &base + vec_size*2
4421 VS4: vx4 = &base + vec_size*3
4422
4423 Then permutation statements are generated:
4424
4425 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4426 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4427 ...
4428
4429 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4430 (the order of the data-refs in the output of vect_permute_load_chain
4431 corresponds to the order of scalar stmts in the interleaving chain - see
4432 the documentation of vect_permute_load_chain()).
4433 The generation of permutation stmts and recording them in
4434 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4435
4436 In case of both multiple types and interleaving, the vector loads and
4437 permutation stmts above are created for every copy. The result vector
4438 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4439 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4440
4441 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4442 on a target that supports unaligned accesses (dr_unaligned_supported)
4443 we generate the following code:
4444 p = initial_addr;
4445 indx = 0;
4446 loop {
4447 p = p + indx * vectype_size;
4448 vec_dest = *(p);
4449 indx = indx + 1;
4450 }
4451
4452 Otherwise, the data reference is potentially unaligned on a target that
4453 does not support unaligned accesses (dr_explicit_realign_optimized) -
4454 then generate the following code, in which the data in each iteration is
4455 obtained by two vector loads, one from the previous iteration, and one
4456 from the current iteration:
4457 p1 = initial_addr;
4458 msq_init = *(floor(p1))
4459 p2 = initial_addr + VS - 1;
4460 realignment_token = call target_builtin;
4461 indx = 0;
4462 loop {
4463 p2 = p2 + indx * vectype_size
4464 lsq = *(floor(p2))
4465 vec_dest = realign_load (msq, lsq, realignment_token)
4466 indx = indx + 1;
4467 msq = lsq;
4468 } */
4469
4470 /* If the misalignment remains the same throughout the execution of the
4471 loop, we can create the init_addr and permutation mask at the loop
4472 preheader. Otherwise, it needs to be created inside the loop.
4473 This can only occur when vectorizing memory accesses in the inner-loop
4474 nested within an outer-loop that is being vectorized. */
4475
4476 if (loop && nested_in_vect_loop_p (loop, stmt)
4477 && (TREE_INT_CST_LOW (DR_STEP (dr))
4478 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4479 {
4480 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4481 compute_in_loop = true;
4482 }
4483
4484 if ((alignment_support_scheme == dr_explicit_realign_optimized
4485 || alignment_support_scheme == dr_explicit_realign)
4486 && !compute_in_loop)
4487 {
4488 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4489 alignment_support_scheme, NULL_TREE,
4490 &at_loop);
4491 if (alignment_support_scheme == dr_explicit_realign_optimized)
4492 {
4493 phi = SSA_NAME_DEF_STMT (msq);
4494 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4495 }
4496 }
4497 else
4498 at_loop = loop;
4499
4500 if (negative)
4501 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4502
4503 if (load_lanes_p)
4504 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4505 else
4506 aggr_type = vectype;
4507
4508 prev_stmt_info = NULL;
4509 for (j = 0; j < ncopies; j++)
4510 {
4511 /* 1. Create the vector or array pointer update chain. */
4512 if (j == 0)
4513 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4514 offset, &dummy, gsi,
4515 &ptr_incr, false, &inv_p);
4516 else
4517 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4518 TYPE_SIZE_UNIT (aggr_type));
4519
4520 if (strided_load || slp_perm)
4521 dr_chain = VEC_alloc (tree, heap, vec_num);
4522
4523 if (load_lanes_p)
4524 {
4525 tree vec_array;
4526
4527 vec_array = create_vector_array (vectype, vec_num);
4528
4529 /* Emit:
4530 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4531 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4532 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4533 gimple_call_set_lhs (new_stmt, vec_array);
4534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4535 mark_symbols_for_renaming (new_stmt);
4536
4537 /* Extract each vector into an SSA_NAME. */
4538 for (i = 0; i < vec_num; i++)
4539 {
4540 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4541 vec_array, i);
4542 VEC_quick_push (tree, dr_chain, new_temp);
4543 }
4544
4545 /* Record the mapping between SSA_NAMEs and statements. */
4546 vect_record_strided_load_vectors (stmt, dr_chain);
4547 }
4548 else
4549 {
4550 for (i = 0; i < vec_num; i++)
4551 {
4552 if (i > 0)
4553 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4554 stmt, NULL_TREE);
4555
4556 /* 2. Create the vector-load in the loop. */
4557 switch (alignment_support_scheme)
4558 {
4559 case dr_aligned:
4560 case dr_unaligned_supported:
4561 {
4562 struct ptr_info_def *pi;
4563 data_ref
4564 = build2 (MEM_REF, vectype, dataref_ptr,
4565 build_int_cst (reference_alias_ptr_type
4566 (DR_REF (first_dr)), 0));
4567 pi = get_ptr_info (dataref_ptr);
4568 pi->align = TYPE_ALIGN_UNIT (vectype);
4569 if (alignment_support_scheme == dr_aligned)
4570 {
4571 gcc_assert (aligned_access_p (first_dr));
4572 pi->misalign = 0;
4573 }
4574 else if (DR_MISALIGNMENT (first_dr) == -1)
4575 {
4576 TREE_TYPE (data_ref)
4577 = build_aligned_type (TREE_TYPE (data_ref),
4578 TYPE_ALIGN (elem_type));
4579 pi->align = TYPE_ALIGN_UNIT (elem_type);
4580 pi->misalign = 0;
4581 }
4582 else
4583 {
4584 TREE_TYPE (data_ref)
4585 = build_aligned_type (TREE_TYPE (data_ref),
4586 TYPE_ALIGN (elem_type));
4587 pi->misalign = DR_MISALIGNMENT (first_dr);
4588 }
4589 break;
4590 }
4591 case dr_explicit_realign:
4592 {
4593 tree ptr, bump;
4594 tree vs_minus_1;
4595
4596 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4597
4598 if (compute_in_loop)
4599 msq = vect_setup_realignment (first_stmt, gsi,
4600 &realignment_token,
4601 dr_explicit_realign,
4602 dataref_ptr, NULL);
4603
4604 new_stmt = gimple_build_assign_with_ops
4605 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4606 build_int_cst
4607 (TREE_TYPE (dataref_ptr),
4608 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4609 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4610 gimple_assign_set_lhs (new_stmt, ptr);
4611 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4612 data_ref
4613 = build2 (MEM_REF, vectype, ptr,
4614 build_int_cst (reference_alias_ptr_type
4615 (DR_REF (first_dr)), 0));
4616 vec_dest = vect_create_destination_var (scalar_dest,
4617 vectype);
4618 new_stmt = gimple_build_assign (vec_dest, data_ref);
4619 new_temp = make_ssa_name (vec_dest, new_stmt);
4620 gimple_assign_set_lhs (new_stmt, new_temp);
4621 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4622 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4623 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4624 msq = new_temp;
4625
4626 bump = size_binop (MULT_EXPR, vs_minus_1,
4627 TYPE_SIZE_UNIT (elem_type));
4628 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4629 new_stmt = gimple_build_assign_with_ops
4630 (BIT_AND_EXPR, NULL_TREE, ptr,
4631 build_int_cst
4632 (TREE_TYPE (ptr),
4633 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4634 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4635 gimple_assign_set_lhs (new_stmt, ptr);
4636 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4637 data_ref
4638 = build2 (MEM_REF, vectype, ptr,
4639 build_int_cst (reference_alias_ptr_type
4640 (DR_REF (first_dr)), 0));
4641 break;
4642 }
4643 case dr_explicit_realign_optimized:
4644 new_stmt = gimple_build_assign_with_ops
4645 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4646 build_int_cst
4647 (TREE_TYPE (dataref_ptr),
4648 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4649 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4650 new_stmt);
4651 gimple_assign_set_lhs (new_stmt, new_temp);
4652 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4653 data_ref
4654 = build2 (MEM_REF, vectype, new_temp,
4655 build_int_cst (reference_alias_ptr_type
4656 (DR_REF (first_dr)), 0));
4657 break;
4658 default:
4659 gcc_unreachable ();
4660 }
4661 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4662 new_stmt = gimple_build_assign (vec_dest, data_ref);
4663 new_temp = make_ssa_name (vec_dest, new_stmt);
4664 gimple_assign_set_lhs (new_stmt, new_temp);
4665 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4666 mark_symbols_for_renaming (new_stmt);
4667
4668 /* 3. Handle explicit realignment if necessary/supported.
4669 Create in loop:
4670 vec_dest = realign_load (msq, lsq, realignment_token) */
4671 if (alignment_support_scheme == dr_explicit_realign_optimized
4672 || alignment_support_scheme == dr_explicit_realign)
4673 {
4674 lsq = gimple_assign_lhs (new_stmt);
4675 if (!realignment_token)
4676 realignment_token = dataref_ptr;
4677 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4678 new_stmt
4679 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4680 vec_dest, msq, lsq,
4681 realignment_token);
4682 new_temp = make_ssa_name (vec_dest, new_stmt);
4683 gimple_assign_set_lhs (new_stmt, new_temp);
4684 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4685
4686 if (alignment_support_scheme == dr_explicit_realign_optimized)
4687 {
4688 gcc_assert (phi);
4689 if (i == vec_num - 1 && j == ncopies - 1)
4690 add_phi_arg (phi, lsq,
4691 loop_latch_edge (containing_loop),
4692 UNKNOWN_LOCATION);
4693 msq = lsq;
4694 }
4695 }
4696
4697 /* 4. Handle invariant-load. */
4698 if (inv_p && !bb_vinfo)
4699 {
4700 tree vec_inv;
4701 gimple_stmt_iterator gsi2 = *gsi;
4702 gcc_assert (!strided_load);
4703 gsi_next (&gsi2);
4704 vec_inv = build_vector_from_val (vectype, scalar_dest);
4705 new_temp = vect_init_vector (stmt, vec_inv,
4706 vectype, &gsi2);
4707 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4708 }
4709
4710 if (negative)
4711 {
4712 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4713 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4714 }
4715
4716 /* Collect vector loads and later create their permutation in
4717 vect_transform_strided_load (). */
4718 if (strided_load || slp_perm)
4719 VEC_quick_push (tree, dr_chain, new_temp);
4720
4721 /* Store vector loads in the corresponding SLP_NODE. */
4722 if (slp && !slp_perm)
4723 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4724 new_stmt);
4725 }
4726 }
4727
4728 if (slp && !slp_perm)
4729 continue;
4730
4731 if (slp_perm)
4732 {
4733 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4734 slp_node_instance, false))
4735 {
4736 VEC_free (tree, heap, dr_chain);
4737 return false;
4738 }
4739 }
4740 else
4741 {
4742 if (strided_load)
4743 {
4744 if (!load_lanes_p)
4745 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4746 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4747 }
4748 else
4749 {
4750 if (j == 0)
4751 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4752 else
4753 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4754 prev_stmt_info = vinfo_for_stmt (new_stmt);
4755 }
4756 }
4757 if (dr_chain)
4758 VEC_free (tree, heap, dr_chain);
4759 }
4760
4761 return true;
4762 }
4763
4764 /* Function vect_is_simple_cond.
4765
4766 Input:
4767 LOOP - the loop that is being vectorized.
4768 COND - Condition that is checked for simple use.
4769
4770 Output:
4771 *COMP_VECTYPE - the vector type for the comparison.
4772
4773 Returns whether a COND can be vectorized. Checks whether
4774 condition operands are supportable using vec_is_simple_use. */
4775
4776 static bool
4777 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, tree *comp_vectype)
4778 {
4779 tree lhs, rhs;
4780 tree def;
4781 enum vect_def_type dt;
4782 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4783
4784 if (!COMPARISON_CLASS_P (cond))
4785 return false;
4786
4787 lhs = TREE_OPERAND (cond, 0);
4788 rhs = TREE_OPERAND (cond, 1);
4789
4790 if (TREE_CODE (lhs) == SSA_NAME)
4791 {
4792 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4793 if (!vect_is_simple_use_1 (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
4794 &dt, &vectype1))
4795 return false;
4796 }
4797 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4798 && TREE_CODE (lhs) != FIXED_CST)
4799 return false;
4800
4801 if (TREE_CODE (rhs) == SSA_NAME)
4802 {
4803 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4804 if (!vect_is_simple_use_1 (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
4805 &dt, &vectype2))
4806 return false;
4807 }
4808 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4809 && TREE_CODE (rhs) != FIXED_CST)
4810 return false;
4811
4812 *comp_vectype = vectype1 ? vectype1 : vectype2;
4813 return true;
4814 }
4815
4816 /* vectorizable_condition.
4817
4818 Check if STMT is conditional modify expression that can be vectorized.
4819 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4820 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4821 at GSI.
4822
4823 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4824 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4825 else caluse if it is 2).
4826
4827 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4828
4829 bool
4830 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4831 gimple *vec_stmt, tree reduc_def, int reduc_index)
4832 {
4833 tree scalar_dest = NULL_TREE;
4834 tree vec_dest = NULL_TREE;
4835 tree cond_expr, then_clause, else_clause;
4836 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4837 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4838 tree comp_vectype;
4839 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4840 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4841 tree vec_compare, vec_cond_expr;
4842 tree new_temp;
4843 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4844 tree def;
4845 enum vect_def_type dt, dts[4];
4846 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4847 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4848 enum tree_code code;
4849 stmt_vec_info prev_stmt_info = NULL;
4850 int j;
4851
4852 /* FORNOW: unsupported in basic block SLP. */
4853 gcc_assert (loop_vinfo);
4854
4855 /* FORNOW: SLP not supported. */
4856 if (STMT_SLP_TYPE (stmt_info))
4857 return false;
4858
4859 gcc_assert (ncopies >= 1);
4860 if (reduc_index && ncopies > 1)
4861 return false; /* FORNOW */
4862
4863 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4864 return false;
4865
4866 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4867 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4868 && reduc_def))
4869 return false;
4870
4871 /* FORNOW: not yet supported. */
4872 if (STMT_VINFO_LIVE_P (stmt_info))
4873 {
4874 if (vect_print_dump_info (REPORT_DETAILS))
4875 fprintf (vect_dump, "value used after loop.");
4876 return false;
4877 }
4878
4879 /* Is vectorizable conditional operation? */
4880 if (!is_gimple_assign (stmt))
4881 return false;
4882
4883 code = gimple_assign_rhs_code (stmt);
4884
4885 if (code != COND_EXPR)
4886 return false;
4887
4888 cond_expr = gimple_assign_rhs1 (stmt);
4889 then_clause = gimple_assign_rhs2 (stmt);
4890 else_clause = gimple_assign_rhs3 (stmt);
4891
4892 if (!vect_is_simple_cond (cond_expr, loop_vinfo, &comp_vectype)
4893 || !comp_vectype)
4894 return false;
4895
4896 if (TREE_CODE (then_clause) == SSA_NAME)
4897 {
4898 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4899 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
4900 &then_def_stmt, &def, &dt))
4901 return false;
4902 }
4903 else if (TREE_CODE (then_clause) != INTEGER_CST
4904 && TREE_CODE (then_clause) != REAL_CST
4905 && TREE_CODE (then_clause) != FIXED_CST)
4906 return false;
4907
4908 if (TREE_CODE (else_clause) == SSA_NAME)
4909 {
4910 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4911 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
4912 &else_def_stmt, &def, &dt))
4913 return false;
4914 }
4915 else if (TREE_CODE (else_clause) != INTEGER_CST
4916 && TREE_CODE (else_clause) != REAL_CST
4917 && TREE_CODE (else_clause) != FIXED_CST)
4918 return false;
4919
4920 if (!vec_stmt)
4921 {
4922 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4923 return expand_vec_cond_expr_p (vectype, comp_vectype);
4924 }
4925
4926 /* Transform */
4927
4928 /* Handle def. */
4929 scalar_dest = gimple_assign_lhs (stmt);
4930 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4931
4932 /* Handle cond expr. */
4933 for (j = 0; j < ncopies; j++)
4934 {
4935 gimple new_stmt;
4936 if (j == 0)
4937 {
4938 gimple gtemp;
4939 vec_cond_lhs =
4940 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4941 stmt, NULL);
4942 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4943 NULL, &gtemp, &def, &dts[0]);
4944 vec_cond_rhs =
4945 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4946 stmt, NULL);
4947 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4948 NULL, &gtemp, &def, &dts[1]);
4949 if (reduc_index == 1)
4950 vec_then_clause = reduc_def;
4951 else
4952 {
4953 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4954 stmt, NULL);
4955 vect_is_simple_use (then_clause, loop_vinfo,
4956 NULL, &gtemp, &def, &dts[2]);
4957 }
4958 if (reduc_index == 2)
4959 vec_else_clause = reduc_def;
4960 else
4961 {
4962 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4963 stmt, NULL);
4964 vect_is_simple_use (else_clause, loop_vinfo,
4965 NULL, &gtemp, &def, &dts[3]);
4966 }
4967 }
4968 else
4969 {
4970 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4971 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4972 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4973 vec_then_clause);
4974 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4975 vec_else_clause);
4976 }
4977
4978 /* Arguments are ready. Create the new vector stmt. */
4979 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4980 vec_cond_lhs, vec_cond_rhs);
4981 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4982 vec_compare, vec_then_clause, vec_else_clause);
4983
4984 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4985 new_temp = make_ssa_name (vec_dest, new_stmt);
4986 gimple_assign_set_lhs (new_stmt, new_temp);
4987 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4988 if (j == 0)
4989 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4990 else
4991 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4992
4993 prev_stmt_info = vinfo_for_stmt (new_stmt);
4994 }
4995
4996 return true;
4997 }
4998
4999
5000 /* Make sure the statement is vectorizable. */
5001
5002 bool
5003 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5004 {
5005 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5006 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5007 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5008 bool ok;
5009 tree scalar_type, vectype;
5010 gimple pattern_stmt, pattern_def_stmt;
5011
5012 if (vect_print_dump_info (REPORT_DETAILS))
5013 {
5014 fprintf (vect_dump, "==> examining statement: ");
5015 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5016 }
5017
5018 if (gimple_has_volatile_ops (stmt))
5019 {
5020 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5021 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5022
5023 return false;
5024 }
5025
5026 /* Skip stmts that do not need to be vectorized. In loops this is expected
5027 to include:
5028 - the COND_EXPR which is the loop exit condition
5029 - any LABEL_EXPRs in the loop
5030 - computations that are used only for array indexing or loop control.
5031 In basic blocks we only analyze statements that are a part of some SLP
5032 instance, therefore, all the statements are relevant.
5033
5034 Pattern statement need to be analyzed instead of the original statement
5035 if the original statement is not relevant. Otherwise, we analyze both
5036 statements. */
5037
5038 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5039 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5040 && !STMT_VINFO_LIVE_P (stmt_info))
5041 {
5042 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5043 && pattern_stmt
5044 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5045 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5046 {
5047 /* Analyze PATTERN_STMT instead of the original stmt. */
5048 stmt = pattern_stmt;
5049 stmt_info = vinfo_for_stmt (pattern_stmt);
5050 if (vect_print_dump_info (REPORT_DETAILS))
5051 {
5052 fprintf (vect_dump, "==> examining pattern statement: ");
5053 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5054 }
5055 }
5056 else
5057 {
5058 if (vect_print_dump_info (REPORT_DETAILS))
5059 fprintf (vect_dump, "irrelevant.");
5060
5061 return true;
5062 }
5063 }
5064 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5065 && pattern_stmt
5066 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5067 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5068 {
5069 /* Analyze PATTERN_STMT too. */
5070 if (vect_print_dump_info (REPORT_DETAILS))
5071 {
5072 fprintf (vect_dump, "==> examining pattern statement: ");
5073 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5074 }
5075
5076 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5077 return false;
5078 }
5079
5080 if (is_pattern_stmt_p (stmt_info)
5081 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5082 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5083 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5084 {
5085 /* Analyze def stmt of STMT if it's a pattern stmt. */
5086 if (vect_print_dump_info (REPORT_DETAILS))
5087 {
5088 fprintf (vect_dump, "==> examining pattern def statement: ");
5089 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5090 }
5091
5092 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5093 return false;
5094 }
5095
5096
5097 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5098 {
5099 case vect_internal_def:
5100 break;
5101
5102 case vect_reduction_def:
5103 case vect_nested_cycle:
5104 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5105 || relevance == vect_used_in_outer_by_reduction
5106 || relevance == vect_unused_in_scope));
5107 break;
5108
5109 case vect_induction_def:
5110 case vect_constant_def:
5111 case vect_external_def:
5112 case vect_unknown_def_type:
5113 default:
5114 gcc_unreachable ();
5115 }
5116
5117 if (bb_vinfo)
5118 {
5119 gcc_assert (PURE_SLP_STMT (stmt_info));
5120
5121 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5122 if (vect_print_dump_info (REPORT_DETAILS))
5123 {
5124 fprintf (vect_dump, "get vectype for scalar type: ");
5125 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5126 }
5127
5128 vectype = get_vectype_for_scalar_type (scalar_type);
5129 if (!vectype)
5130 {
5131 if (vect_print_dump_info (REPORT_DETAILS))
5132 {
5133 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5134 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5135 }
5136 return false;
5137 }
5138
5139 if (vect_print_dump_info (REPORT_DETAILS))
5140 {
5141 fprintf (vect_dump, "vectype: ");
5142 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5143 }
5144
5145 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5146 }
5147
5148 if (STMT_VINFO_RELEVANT_P (stmt_info))
5149 {
5150 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5151 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5152 *need_to_vectorize = true;
5153 }
5154
5155 ok = true;
5156 if (!bb_vinfo
5157 && (STMT_VINFO_RELEVANT_P (stmt_info)
5158 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5159 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5160 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5161 || vectorizable_conversion (stmt, NULL, NULL, NULL)
5162 || vectorizable_shift (stmt, NULL, NULL, NULL)
5163 || vectorizable_operation (stmt, NULL, NULL, NULL)
5164 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5165 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5166 || vectorizable_call (stmt, NULL, NULL)
5167 || vectorizable_store (stmt, NULL, NULL, NULL)
5168 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5169 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
5170 else
5171 {
5172 if (bb_vinfo)
5173 ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
5174 || vectorizable_type_demotion (stmt, NULL, NULL, node)
5175 || vectorizable_shift (stmt, NULL, NULL, node)
5176 || vectorizable_operation (stmt, NULL, NULL, node)
5177 || vectorizable_assignment (stmt, NULL, NULL, node)
5178 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5179 || vectorizable_store (stmt, NULL, NULL, node));
5180 }
5181
5182 if (!ok)
5183 {
5184 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5185 {
5186 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5187 fprintf (vect_dump, "supported: ");
5188 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5189 }
5190
5191 return false;
5192 }
5193
5194 if (bb_vinfo)
5195 return true;
5196
5197 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5198 need extra handling, except for vectorizable reductions. */
5199 if (STMT_VINFO_LIVE_P (stmt_info)
5200 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5201 ok = vectorizable_live_operation (stmt, NULL, NULL);
5202
5203 if (!ok)
5204 {
5205 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5206 {
5207 fprintf (vect_dump, "not vectorized: live stmt not ");
5208 fprintf (vect_dump, "supported: ");
5209 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5210 }
5211
5212 return false;
5213 }
5214
5215 return true;
5216 }
5217
5218
5219 /* Function vect_transform_stmt.
5220
5221 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5222
5223 bool
5224 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5225 bool *strided_store, slp_tree slp_node,
5226 slp_instance slp_node_instance)
5227 {
5228 bool is_store = false;
5229 gimple vec_stmt = NULL;
5230 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5231 bool done;
5232
5233 switch (STMT_VINFO_TYPE (stmt_info))
5234 {
5235 case type_demotion_vec_info_type:
5236 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5237 gcc_assert (done);
5238 break;
5239
5240 case type_promotion_vec_info_type:
5241 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5242 gcc_assert (done);
5243 break;
5244
5245 case type_conversion_vec_info_type:
5246 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5247 gcc_assert (done);
5248 break;
5249
5250 case induc_vec_info_type:
5251 gcc_assert (!slp_node);
5252 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5253 gcc_assert (done);
5254 break;
5255
5256 case shift_vec_info_type:
5257 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5258 gcc_assert (done);
5259 break;
5260
5261 case op_vec_info_type:
5262 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5263 gcc_assert (done);
5264 break;
5265
5266 case assignment_vec_info_type:
5267 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5268 gcc_assert (done);
5269 break;
5270
5271 case load_vec_info_type:
5272 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5273 slp_node_instance);
5274 gcc_assert (done);
5275 break;
5276
5277 case store_vec_info_type:
5278 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5279 gcc_assert (done);
5280 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5281 {
5282 /* In case of interleaving, the whole chain is vectorized when the
5283 last store in the chain is reached. Store stmts before the last
5284 one are skipped, and there vec_stmt_info shouldn't be freed
5285 meanwhile. */
5286 *strided_store = true;
5287 if (STMT_VINFO_VEC_STMT (stmt_info))
5288 is_store = true;
5289 }
5290 else
5291 is_store = true;
5292 break;
5293
5294 case condition_vec_info_type:
5295 gcc_assert (!slp_node);
5296 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
5297 gcc_assert (done);
5298 break;
5299
5300 case call_vec_info_type:
5301 gcc_assert (!slp_node);
5302 done = vectorizable_call (stmt, gsi, &vec_stmt);
5303 stmt = gsi_stmt (*gsi);
5304 break;
5305
5306 case reduc_vec_info_type:
5307 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5308 gcc_assert (done);
5309 break;
5310
5311 default:
5312 if (!STMT_VINFO_LIVE_P (stmt_info))
5313 {
5314 if (vect_print_dump_info (REPORT_DETAILS))
5315 fprintf (vect_dump, "stmt not supported.");
5316 gcc_unreachable ();
5317 }
5318 }
5319
5320 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5321 is being vectorized, but outside the immediately enclosing loop. */
5322 if (vec_stmt
5323 && STMT_VINFO_LOOP_VINFO (stmt_info)
5324 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5325 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5326 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5327 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5328 || STMT_VINFO_RELEVANT (stmt_info) ==
5329 vect_used_in_outer_by_reduction))
5330 {
5331 struct loop *innerloop = LOOP_VINFO_LOOP (
5332 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5333 imm_use_iterator imm_iter;
5334 use_operand_p use_p;
5335 tree scalar_dest;
5336 gimple exit_phi;
5337
5338 if (vect_print_dump_info (REPORT_DETAILS))
5339 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5340
5341 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5342 (to be used when vectorizing outer-loop stmts that use the DEF of
5343 STMT). */
5344 if (gimple_code (stmt) == GIMPLE_PHI)
5345 scalar_dest = PHI_RESULT (stmt);
5346 else
5347 scalar_dest = gimple_assign_lhs (stmt);
5348
5349 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5350 {
5351 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5352 {
5353 exit_phi = USE_STMT (use_p);
5354 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5355 }
5356 }
5357 }
5358
5359 /* Handle stmts whose DEF is used outside the loop-nest that is
5360 being vectorized. */
5361 if (STMT_VINFO_LIVE_P (stmt_info)
5362 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5363 {
5364 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5365 gcc_assert (done);
5366 }
5367
5368 if (vec_stmt)
5369 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5370
5371 return is_store;
5372 }
5373
5374
5375 /* Remove a group of stores (for SLP or interleaving), free their
5376 stmt_vec_info. */
5377
5378 void
5379 vect_remove_stores (gimple first_stmt)
5380 {
5381 gimple next = first_stmt;
5382 gimple tmp;
5383 gimple_stmt_iterator next_si;
5384
5385 while (next)
5386 {
5387 /* Free the attached stmt_vec_info and remove the stmt. */
5388 next_si = gsi_for_stmt (next);
5389 gsi_remove (&next_si, true);
5390 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
5391 free_stmt_vec_info (next);
5392 next = tmp;
5393 }
5394 }
5395
5396
5397 /* Function new_stmt_vec_info.
5398
5399 Create and initialize a new stmt_vec_info struct for STMT. */
5400
5401 stmt_vec_info
5402 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5403 bb_vec_info bb_vinfo)
5404 {
5405 stmt_vec_info res;
5406 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5407
5408 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5409 STMT_VINFO_STMT (res) = stmt;
5410 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5411 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5412 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5413 STMT_VINFO_LIVE_P (res) = false;
5414 STMT_VINFO_VECTYPE (res) = NULL;
5415 STMT_VINFO_VEC_STMT (res) = NULL;
5416 STMT_VINFO_VECTORIZABLE (res) = true;
5417 STMT_VINFO_IN_PATTERN_P (res) = false;
5418 STMT_VINFO_RELATED_STMT (res) = NULL;
5419 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5420 STMT_VINFO_DATA_REF (res) = NULL;
5421
5422 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5423 STMT_VINFO_DR_OFFSET (res) = NULL;
5424 STMT_VINFO_DR_INIT (res) = NULL;
5425 STMT_VINFO_DR_STEP (res) = NULL;
5426 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5427
5428 if (gimple_code (stmt) == GIMPLE_PHI
5429 && is_loop_header_bb_p (gimple_bb (stmt)))
5430 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5431 else
5432 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5433
5434 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5435 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5436 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5437 STMT_SLP_TYPE (res) = loop_vect;
5438 GROUP_FIRST_ELEMENT (res) = NULL;
5439 GROUP_NEXT_ELEMENT (res) = NULL;
5440 GROUP_SIZE (res) = 0;
5441 GROUP_STORE_COUNT (res) = 0;
5442 GROUP_GAP (res) = 0;
5443 GROUP_SAME_DR_STMT (res) = NULL;
5444 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5445
5446 return res;
5447 }
5448
5449
5450 /* Create a hash table for stmt_vec_info. */
5451
5452 void
5453 init_stmt_vec_info_vec (void)
5454 {
5455 gcc_assert (!stmt_vec_info_vec);
5456 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5457 }
5458
5459
5460 /* Free hash table for stmt_vec_info. */
5461
5462 void
5463 free_stmt_vec_info_vec (void)
5464 {
5465 gcc_assert (stmt_vec_info_vec);
5466 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5467 }
5468
5469
5470 /* Free stmt vectorization related info. */
5471
5472 void
5473 free_stmt_vec_info (gimple stmt)
5474 {
5475 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5476
5477 if (!stmt_info)
5478 return;
5479
5480 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5481 set_vinfo_for_stmt (stmt, NULL);
5482 free (stmt_info);
5483 }
5484
5485
5486 /* Function get_vectype_for_scalar_type_and_size.
5487
5488 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5489 by the target. */
5490
5491 static tree
5492 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5493 {
5494 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5495 enum machine_mode simd_mode;
5496 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5497 int nunits;
5498 tree vectype;
5499
5500 if (nbytes == 0)
5501 return NULL_TREE;
5502
5503 /* We can't build a vector type of elements with alignment bigger than
5504 their size. */
5505 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5506 return NULL_TREE;
5507
5508 /* For vector types of elements whose mode precision doesn't
5509 match their types precision we use a element type of mode
5510 precision. The vectorization routines will have to make sure
5511 they support the proper result truncation/extension. */
5512 if (INTEGRAL_TYPE_P (scalar_type)
5513 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5514 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5515 TYPE_UNSIGNED (scalar_type));
5516
5517 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5518 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5519 return NULL_TREE;
5520
5521 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5522 When the component mode passes the above test simply use a type
5523 corresponding to that mode. The theory is that any use that
5524 would cause problems with this will disable vectorization anyway. */
5525 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5526 && !INTEGRAL_TYPE_P (scalar_type)
5527 && !POINTER_TYPE_P (scalar_type))
5528 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5529
5530 /* If no size was supplied use the mode the target prefers. Otherwise
5531 lookup a vector mode of the specified size. */
5532 if (size == 0)
5533 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5534 else
5535 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5536 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5537 if (nunits <= 1)
5538 return NULL_TREE;
5539
5540 vectype = build_vector_type (scalar_type, nunits);
5541 if (vect_print_dump_info (REPORT_DETAILS))
5542 {
5543 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5544 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5545 }
5546
5547 if (!vectype)
5548 return NULL_TREE;
5549
5550 if (vect_print_dump_info (REPORT_DETAILS))
5551 {
5552 fprintf (vect_dump, "vectype: ");
5553 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5554 }
5555
5556 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5557 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5558 {
5559 if (vect_print_dump_info (REPORT_DETAILS))
5560 fprintf (vect_dump, "mode not supported by target.");
5561 return NULL_TREE;
5562 }
5563
5564 return vectype;
5565 }
5566
5567 unsigned int current_vector_size;
5568
5569 /* Function get_vectype_for_scalar_type.
5570
5571 Returns the vector type corresponding to SCALAR_TYPE as supported
5572 by the target. */
5573
5574 tree
5575 get_vectype_for_scalar_type (tree scalar_type)
5576 {
5577 tree vectype;
5578 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5579 current_vector_size);
5580 if (vectype
5581 && current_vector_size == 0)
5582 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5583 return vectype;
5584 }
5585
5586 /* Function get_same_sized_vectype
5587
5588 Returns a vector type corresponding to SCALAR_TYPE of size
5589 VECTOR_TYPE if supported by the target. */
5590
5591 tree
5592 get_same_sized_vectype (tree scalar_type, tree vector_type)
5593 {
5594 return get_vectype_for_scalar_type_and_size
5595 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5596 }
5597
5598 /* Function vect_is_simple_use.
5599
5600 Input:
5601 LOOP_VINFO - the vect info of the loop that is being vectorized.
5602 BB_VINFO - the vect info of the basic block that is being vectorized.
5603 OPERAND - operand of a stmt in the loop or bb.
5604 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5605
5606 Returns whether a stmt with OPERAND can be vectorized.
5607 For loops, supportable operands are constants, loop invariants, and operands
5608 that are defined by the current iteration of the loop. Unsupportable
5609 operands are those that are defined by a previous iteration of the loop (as
5610 is the case in reduction/induction computations).
5611 For basic blocks, supportable operands are constants and bb invariants.
5612 For now, operands defined outside the basic block are not supported. */
5613
5614 bool
5615 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5616 bb_vec_info bb_vinfo, gimple *def_stmt,
5617 tree *def, enum vect_def_type *dt)
5618 {
5619 basic_block bb;
5620 stmt_vec_info stmt_vinfo;
5621 struct loop *loop = NULL;
5622
5623 if (loop_vinfo)
5624 loop = LOOP_VINFO_LOOP (loop_vinfo);
5625
5626 *def_stmt = NULL;
5627 *def = NULL_TREE;
5628
5629 if (vect_print_dump_info (REPORT_DETAILS))
5630 {
5631 fprintf (vect_dump, "vect_is_simple_use: operand ");
5632 print_generic_expr (vect_dump, operand, TDF_SLIM);
5633 }
5634
5635 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5636 {
5637 *dt = vect_constant_def;
5638 return true;
5639 }
5640
5641 if (is_gimple_min_invariant (operand))
5642 {
5643 *def = operand;
5644 *dt = vect_external_def;
5645 return true;
5646 }
5647
5648 if (TREE_CODE (operand) == PAREN_EXPR)
5649 {
5650 if (vect_print_dump_info (REPORT_DETAILS))
5651 fprintf (vect_dump, "non-associatable copy.");
5652 operand = TREE_OPERAND (operand, 0);
5653 }
5654
5655 if (TREE_CODE (operand) != SSA_NAME)
5656 {
5657 if (vect_print_dump_info (REPORT_DETAILS))
5658 fprintf (vect_dump, "not ssa-name.");
5659 return false;
5660 }
5661
5662 *def_stmt = SSA_NAME_DEF_STMT (operand);
5663 if (*def_stmt == NULL)
5664 {
5665 if (vect_print_dump_info (REPORT_DETAILS))
5666 fprintf (vect_dump, "no def_stmt.");
5667 return false;
5668 }
5669
5670 if (vect_print_dump_info (REPORT_DETAILS))
5671 {
5672 fprintf (vect_dump, "def_stmt: ");
5673 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5674 }
5675
5676 /* Empty stmt is expected only in case of a function argument.
5677 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5678 if (gimple_nop_p (*def_stmt))
5679 {
5680 *def = operand;
5681 *dt = vect_external_def;
5682 return true;
5683 }
5684
5685 bb = gimple_bb (*def_stmt);
5686
5687 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5688 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5689 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5690 *dt = vect_external_def;
5691 else
5692 {
5693 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5694 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5695 }
5696
5697 if (*dt == vect_unknown_def_type)
5698 {
5699 if (vect_print_dump_info (REPORT_DETAILS))
5700 fprintf (vect_dump, "Unsupported pattern.");
5701 return false;
5702 }
5703
5704 if (vect_print_dump_info (REPORT_DETAILS))
5705 fprintf (vect_dump, "type of def: %d.",*dt);
5706
5707 switch (gimple_code (*def_stmt))
5708 {
5709 case GIMPLE_PHI:
5710 *def = gimple_phi_result (*def_stmt);
5711 break;
5712
5713 case GIMPLE_ASSIGN:
5714 *def = gimple_assign_lhs (*def_stmt);
5715 break;
5716
5717 case GIMPLE_CALL:
5718 *def = gimple_call_lhs (*def_stmt);
5719 if (*def != NULL)
5720 break;
5721 /* FALLTHRU */
5722 default:
5723 if (vect_print_dump_info (REPORT_DETAILS))
5724 fprintf (vect_dump, "unsupported defining stmt: ");
5725 return false;
5726 }
5727
5728 return true;
5729 }
5730
5731 /* Function vect_is_simple_use_1.
5732
5733 Same as vect_is_simple_use_1 but also determines the vector operand
5734 type of OPERAND and stores it to *VECTYPE. If the definition of
5735 OPERAND is vect_uninitialized_def, vect_constant_def or
5736 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5737 is responsible to compute the best suited vector type for the
5738 scalar operand. */
5739
5740 bool
5741 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5742 bb_vec_info bb_vinfo, gimple *def_stmt,
5743 tree *def, enum vect_def_type *dt, tree *vectype)
5744 {
5745 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5746 return false;
5747
5748 /* Now get a vector type if the def is internal, otherwise supply
5749 NULL_TREE and leave it up to the caller to figure out a proper
5750 type for the use stmt. */
5751 if (*dt == vect_internal_def
5752 || *dt == vect_induction_def
5753 || *dt == vect_reduction_def
5754 || *dt == vect_double_reduction_def
5755 || *dt == vect_nested_cycle)
5756 {
5757 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5758
5759 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5760 && !STMT_VINFO_RELEVANT (stmt_info)
5761 && !STMT_VINFO_LIVE_P (stmt_info))
5762 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5763
5764 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5765 gcc_assert (*vectype != NULL_TREE);
5766 }
5767 else if (*dt == vect_uninitialized_def
5768 || *dt == vect_constant_def
5769 || *dt == vect_external_def)
5770 *vectype = NULL_TREE;
5771 else
5772 gcc_unreachable ();
5773
5774 return true;
5775 }
5776
5777
5778 /* Function supportable_widening_operation
5779
5780 Check whether an operation represented by the code CODE is a
5781 widening operation that is supported by the target platform in
5782 vector form (i.e., when operating on arguments of type VECTYPE_IN
5783 producing a result of type VECTYPE_OUT).
5784
5785 Widening operations we currently support are NOP (CONVERT), FLOAT
5786 and WIDEN_MULT. This function checks if these operations are supported
5787 by the target platform either directly (via vector tree-codes), or via
5788 target builtins.
5789
5790 Output:
5791 - CODE1 and CODE2 are codes of vector operations to be used when
5792 vectorizing the operation, if available.
5793 - DECL1 and DECL2 are decls of target builtin functions to be used
5794 when vectorizing the operation, if available. In this case,
5795 CODE1 and CODE2 are CALL_EXPR.
5796 - MULTI_STEP_CVT determines the number of required intermediate steps in
5797 case of multi-step conversion (like char->short->int - in that case
5798 MULTI_STEP_CVT will be 1).
5799 - INTERM_TYPES contains the intermediate type required to perform the
5800 widening operation (short in the above example). */
5801
5802 bool
5803 supportable_widening_operation (enum tree_code code, gimple stmt,
5804 tree vectype_out, tree vectype_in,
5805 tree *decl1, tree *decl2,
5806 enum tree_code *code1, enum tree_code *code2,
5807 int *multi_step_cvt,
5808 VEC (tree, heap) **interm_types)
5809 {
5810 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5811 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5812 struct loop *vect_loop = NULL;
5813 bool ordered_p;
5814 enum machine_mode vec_mode;
5815 enum insn_code icode1, icode2;
5816 optab optab1, optab2;
5817 tree vectype = vectype_in;
5818 tree wide_vectype = vectype_out;
5819 enum tree_code c1, c2;
5820
5821 if (loop_info)
5822 vect_loop = LOOP_VINFO_LOOP (loop_info);
5823
5824 /* The result of a vectorized widening operation usually requires two vectors
5825 (because the widened results do not fit int one vector). The generated
5826 vector results would normally be expected to be generated in the same
5827 order as in the original scalar computation, i.e. if 8 results are
5828 generated in each vector iteration, they are to be organized as follows:
5829 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5830
5831 However, in the special case that the result of the widening operation is
5832 used in a reduction computation only, the order doesn't matter (because
5833 when vectorizing a reduction we change the order of the computation).
5834 Some targets can take advantage of this and generate more efficient code.
5835 For example, targets like Altivec, that support widen_mult using a sequence
5836 of {mult_even,mult_odd} generate the following vectors:
5837 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5838
5839 When vectorizing outer-loops, we execute the inner-loop sequentially
5840 (each vectorized inner-loop iteration contributes to VF outer-loop
5841 iterations in parallel). We therefore don't allow to change the order
5842 of the computation in the inner-loop during outer-loop vectorization. */
5843
5844 if (vect_loop
5845 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5846 && !nested_in_vect_loop_p (vect_loop, stmt))
5847 ordered_p = false;
5848 else
5849 ordered_p = true;
5850
5851 if (!ordered_p
5852 && code == WIDEN_MULT_EXPR
5853 && targetm.vectorize.builtin_mul_widen_even
5854 && targetm.vectorize.builtin_mul_widen_even (vectype)
5855 && targetm.vectorize.builtin_mul_widen_odd
5856 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5857 {
5858 if (vect_print_dump_info (REPORT_DETAILS))
5859 fprintf (vect_dump, "Unordered widening operation detected.");
5860
5861 *code1 = *code2 = CALL_EXPR;
5862 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5863 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5864 return true;
5865 }
5866
5867 switch (code)
5868 {
5869 case WIDEN_MULT_EXPR:
5870 if (BYTES_BIG_ENDIAN)
5871 {
5872 c1 = VEC_WIDEN_MULT_HI_EXPR;
5873 c2 = VEC_WIDEN_MULT_LO_EXPR;
5874 }
5875 else
5876 {
5877 c2 = VEC_WIDEN_MULT_HI_EXPR;
5878 c1 = VEC_WIDEN_MULT_LO_EXPR;
5879 }
5880 break;
5881
5882 case WIDEN_LSHIFT_EXPR:
5883 if (BYTES_BIG_ENDIAN)
5884 {
5885 c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
5886 c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
5887 }
5888 else
5889 {
5890 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
5891 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
5892 }
5893 break;
5894
5895 CASE_CONVERT:
5896 if (BYTES_BIG_ENDIAN)
5897 {
5898 c1 = VEC_UNPACK_HI_EXPR;
5899 c2 = VEC_UNPACK_LO_EXPR;
5900 }
5901 else
5902 {
5903 c2 = VEC_UNPACK_HI_EXPR;
5904 c1 = VEC_UNPACK_LO_EXPR;
5905 }
5906 break;
5907
5908 case FLOAT_EXPR:
5909 if (BYTES_BIG_ENDIAN)
5910 {
5911 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5912 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5913 }
5914 else
5915 {
5916 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5917 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5918 }
5919 break;
5920
5921 case FIX_TRUNC_EXPR:
5922 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5923 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5924 computing the operation. */
5925 return false;
5926
5927 default:
5928 gcc_unreachable ();
5929 }
5930
5931 if (code == FIX_TRUNC_EXPR)
5932 {
5933 /* The signedness is determined from output operand. */
5934 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5935 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
5936 }
5937 else
5938 {
5939 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5940 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5941 }
5942
5943 if (!optab1 || !optab2)
5944 return false;
5945
5946 vec_mode = TYPE_MODE (vectype);
5947 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5948 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
5949 return false;
5950
5951 /* Check if it's a multi-step conversion that can be done using intermediate
5952 types. */
5953 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5954 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5955 {
5956 int i;
5957 tree prev_type = vectype, intermediate_type;
5958 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5959 optab optab3, optab4;
5960
5961 if (!CONVERT_EXPR_CODE_P (code))
5962 return false;
5963
5964 *code1 = c1;
5965 *code2 = c2;
5966
5967 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5968 intermediate steps in promotion sequence. We try
5969 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5970 not. */
5971 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5972 for (i = 0; i < 3; i++)
5973 {
5974 intermediate_mode = insn_data[icode1].operand[0].mode;
5975 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5976 TYPE_UNSIGNED (prev_type));
5977 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5978 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5979
5980 if (!optab3 || !optab4
5981 || ((icode1 = optab_handler (optab1, prev_mode))
5982 == CODE_FOR_nothing)
5983 || insn_data[icode1].operand[0].mode != intermediate_mode
5984 || ((icode2 = optab_handler (optab2, prev_mode))
5985 == CODE_FOR_nothing)
5986 || insn_data[icode2].operand[0].mode != intermediate_mode
5987 || ((icode1 = optab_handler (optab3, intermediate_mode))
5988 == CODE_FOR_nothing)
5989 || ((icode2 = optab_handler (optab4, intermediate_mode))
5990 == CODE_FOR_nothing))
5991 return false;
5992
5993 VEC_quick_push (tree, *interm_types, intermediate_type);
5994 (*multi_step_cvt)++;
5995
5996 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5997 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5998 return true;
5999
6000 prev_type = intermediate_type;
6001 prev_mode = intermediate_mode;
6002 }
6003
6004 return false;
6005 }
6006
6007 *code1 = c1;
6008 *code2 = c2;
6009 return true;
6010 }
6011
6012
6013 /* Function supportable_narrowing_operation
6014
6015 Check whether an operation represented by the code CODE is a
6016 narrowing operation that is supported by the target platform in
6017 vector form (i.e., when operating on arguments of type VECTYPE_IN
6018 and producing a result of type VECTYPE_OUT).
6019
6020 Narrowing operations we currently support are NOP (CONVERT) and
6021 FIX_TRUNC. This function checks if these operations are supported by
6022 the target platform directly via vector tree-codes.
6023
6024 Output:
6025 - CODE1 is the code of a vector operation to be used when
6026 vectorizing the operation, if available.
6027 - MULTI_STEP_CVT determines the number of required intermediate steps in
6028 case of multi-step conversion (like int->short->char - in that case
6029 MULTI_STEP_CVT will be 1).
6030 - INTERM_TYPES contains the intermediate type required to perform the
6031 narrowing operation (short in the above example). */
6032
6033 bool
6034 supportable_narrowing_operation (enum tree_code code,
6035 tree vectype_out, tree vectype_in,
6036 enum tree_code *code1, int *multi_step_cvt,
6037 VEC (tree, heap) **interm_types)
6038 {
6039 enum machine_mode vec_mode;
6040 enum insn_code icode1;
6041 optab optab1, interm_optab;
6042 tree vectype = vectype_in;
6043 tree narrow_vectype = vectype_out;
6044 enum tree_code c1;
6045 tree intermediate_type, prev_type;
6046 int i;
6047
6048 switch (code)
6049 {
6050 CASE_CONVERT:
6051 c1 = VEC_PACK_TRUNC_EXPR;
6052 break;
6053
6054 case FIX_TRUNC_EXPR:
6055 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6056 break;
6057
6058 case FLOAT_EXPR:
6059 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6060 tree code and optabs used for computing the operation. */
6061 return false;
6062
6063 default:
6064 gcc_unreachable ();
6065 }
6066
6067 if (code == FIX_TRUNC_EXPR)
6068 /* The signedness is determined from output operand. */
6069 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6070 else
6071 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6072
6073 if (!optab1)
6074 return false;
6075
6076 vec_mode = TYPE_MODE (vectype);
6077 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6078 return false;
6079
6080 /* Check if it's a multi-step conversion that can be done using intermediate
6081 types. */
6082 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
6083 {
6084 enum machine_mode intermediate_mode, prev_mode = vec_mode;
6085
6086 *code1 = c1;
6087 prev_type = vectype;
6088 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6089 intermediate steps in promotion sequence. We try
6090 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6091 not. */
6092 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6093 for (i = 0; i < 3; i++)
6094 {
6095 intermediate_mode = insn_data[icode1].operand[0].mode;
6096 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
6097 TYPE_UNSIGNED (prev_type));
6098 interm_optab = optab_for_tree_code (c1, intermediate_type,
6099 optab_default);
6100 if (!interm_optab
6101 || ((icode1 = optab_handler (optab1, prev_mode))
6102 == CODE_FOR_nothing)
6103 || insn_data[icode1].operand[0].mode != intermediate_mode
6104 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6105 == CODE_FOR_nothing))
6106 return false;
6107
6108 VEC_quick_push (tree, *interm_types, intermediate_type);
6109 (*multi_step_cvt)++;
6110
6111 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6112 return true;
6113
6114 prev_type = intermediate_type;
6115 prev_mode = intermediate_mode;
6116 }
6117
6118 return false;
6119 }
6120
6121 *code1 = c1;
6122 return true;
6123 }