lto-cgraph.c (get_alias_symbol): Remove weakref sanity check.
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "recog.h" /* FIXME: for insn_data */
36 #include "optabs.h"
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
39 #include "dumpfile.h"
40
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
43
44 /* Return the vectorized type for the given statement. */
45
46 tree
47 stmt_vectype (struct _stmt_vec_info *stmt_info)
48 {
49 return STMT_VINFO_VECTYPE (stmt_info);
50 }
51
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54 bool
55 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
56 {
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
61
62 if (!loop_vinfo)
63 return false;
64
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
66
67 return (bb->loop_father == loop->inner);
68 }
69
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
73
74 unsigned
75 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
77 int misalign, enum vect_cost_model_location where)
78 {
79 if (body_cost_vec)
80 {
81 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82 add_stmt_info_to_vec (body_cost_vec, count, kind,
83 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84 misalign);
85 return (unsigned)
86 (builtin_vectorization_cost (kind, vectype, misalign) * count);
87
88 }
89 else
90 {
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
94
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
99
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101 misalign, where);
102 }
103 }
104
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
106
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109 {
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
112 }
113
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
118
119 static tree
120 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
122 {
123 tree vect_type, vect, vect_name, array_ref;
124 gimple new_stmt;
125
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
132
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
137
138 return vect_name;
139 }
140
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
144
145 static void
146 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
148 {
149 tree array_ref;
150 gimple new_stmt;
151
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
155
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
158 }
159
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
163
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166 {
167 tree mem_ref, alias_ptr_type;
168
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
174 }
175
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177
178 /* Function vect_mark_relevant.
179
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
181
182 static void
183 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
186 {
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
190 gimple pattern_stmt;
191
192 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d.", relevant, live_p);
195
196 /* If this stmt is an original stmt in a pattern, we might need to mark its
197 related pattern stmt instead of the original stmt. However, such stmts
198 may have their own uses that are not in any pattern, in such cases the
199 stmt itself should be marked. */
200 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
201 {
202 bool found = false;
203 if (!used_in_pattern)
204 {
205 imm_use_iterator imm_iter;
206 use_operand_p use_p;
207 gimple use_stmt;
208 tree lhs;
209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
210 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
211
212 if (is_gimple_assign (stmt))
213 lhs = gimple_assign_lhs (stmt);
214 else
215 lhs = gimple_call_lhs (stmt);
216
217 /* This use is out of pattern use, if LHS has other uses that are
218 pattern uses, we should mark the stmt itself, and not the pattern
219 stmt. */
220 if (TREE_CODE (lhs) == SSA_NAME)
221 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
222 {
223 if (is_gimple_debug (USE_STMT (use_p)))
224 continue;
225 use_stmt = USE_STMT (use_p);
226
227 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
228 continue;
229
230 if (vinfo_for_stmt (use_stmt)
231 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
232 {
233 found = true;
234 break;
235 }
236 }
237 }
238
239 if (!found)
240 {
241 /* This is the last stmt in a sequence that was detected as a
242 pattern that can potentially be vectorized. Don't mark the stmt
243 as relevant/live because it's not going to be vectorized.
244 Instead mark the pattern-stmt that replaces it. */
245
246 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
247
248 if (dump_enabled_p ())
249 dump_printf_loc (MSG_NOTE, vect_location,
250 "last stmt in pattern. don't mark"
251 " relevant/live.");
252 stmt_info = vinfo_for_stmt (pattern_stmt);
253 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
254 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
255 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
256 stmt = pattern_stmt;
257 }
258 }
259
260 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
261 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
262 STMT_VINFO_RELEVANT (stmt_info) = relevant;
263
264 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
265 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
266 {
267 if (dump_enabled_p ())
268 dump_printf_loc (MSG_NOTE, vect_location,
269 "already marked relevant/live.");
270 return;
271 }
272
273 worklist->safe_push (stmt);
274 }
275
276
277 /* Function vect_stmt_relevant_p.
278
279 Return true if STMT in loop that is represented by LOOP_VINFO is
280 "relevant for vectorization".
281
282 A stmt is considered "relevant for vectorization" if:
283 - it has uses outside the loop.
284 - it has vdefs (it alters memory).
285 - control stmts in the loop (except for the exit condition).
286
287 CHECKME: what other side effects would the vectorizer allow? */
288
289 static bool
290 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
291 enum vect_relevant *relevant, bool *live_p)
292 {
293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294 ssa_op_iter op_iter;
295 imm_use_iterator imm_iter;
296 use_operand_p use_p;
297 def_operand_p def_p;
298
299 *relevant = vect_unused_in_scope;
300 *live_p = false;
301
302 /* cond stmt other than loop exit cond. */
303 if (is_ctrl_stmt (stmt)
304 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
305 != loop_exit_ctrl_vec_info_type)
306 *relevant = vect_used_in_scope;
307
308 /* changing memory. */
309 if (gimple_code (stmt) != GIMPLE_PHI)
310 if (gimple_vdef (stmt))
311 {
312 if (dump_enabled_p ())
313 dump_printf_loc (MSG_NOTE, vect_location,
314 "vec_stmt_relevant_p: stmt has vdefs.");
315 *relevant = vect_used_in_scope;
316 }
317
318 /* uses outside the loop. */
319 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
320 {
321 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
322 {
323 basic_block bb = gimple_bb (USE_STMT (use_p));
324 if (!flow_bb_inside_loop_p (loop, bb))
325 {
326 if (dump_enabled_p ())
327 dump_printf_loc (MSG_NOTE, vect_location,
328 "vec_stmt_relevant_p: used out of loop.");
329
330 if (is_gimple_debug (USE_STMT (use_p)))
331 continue;
332
333 /* We expect all such uses to be in the loop exit phis
334 (because of loop closed form) */
335 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
336 gcc_assert (bb == single_exit (loop)->dest);
337
338 *live_p = true;
339 }
340 }
341 }
342
343 return (*live_p || *relevant);
344 }
345
346
347 /* Function exist_non_indexing_operands_for_use_p
348
349 USE is one of the uses attached to STMT. Check if USE is
350 used in STMT for anything other than indexing an array. */
351
352 static bool
353 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
354 {
355 tree operand;
356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
357
358 /* USE corresponds to some operand in STMT. If there is no data
359 reference in STMT, then any operand that corresponds to USE
360 is not indexing an array. */
361 if (!STMT_VINFO_DATA_REF (stmt_info))
362 return true;
363
364 /* STMT has a data_ref. FORNOW this means that its of one of
365 the following forms:
366 -1- ARRAY_REF = var
367 -2- var = ARRAY_REF
368 (This should have been verified in analyze_data_refs).
369
370 'var' in the second case corresponds to a def, not a use,
371 so USE cannot correspond to any operands that are not used
372 for array indexing.
373
374 Therefore, all we need to check is if STMT falls into the
375 first case, and whether var corresponds to USE. */
376
377 if (!gimple_assign_copy_p (stmt))
378 return false;
379 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
380 return false;
381 operand = gimple_assign_rhs1 (stmt);
382 if (TREE_CODE (operand) != SSA_NAME)
383 return false;
384
385 if (operand == use)
386 return true;
387
388 return false;
389 }
390
391
392 /*
393 Function process_use.
394
395 Inputs:
396 - a USE in STMT in a loop represented by LOOP_VINFO
397 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
398 that defined USE. This is done by calling mark_relevant and passing it
399 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
400 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
401 be performed.
402
403 Outputs:
404 Generally, LIVE_P and RELEVANT are used to define the liveness and
405 relevance info of the DEF_STMT of this USE:
406 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
408 Exceptions:
409 - case 1: If USE is used only for address computations (e.g. array indexing),
410 which does not need to be directly vectorized, then the liveness/relevance
411 of the respective DEF_STMT is left unchanged.
412 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413 skip DEF_STMT cause it had already been processed.
414 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
415 be modified accordingly.
416
417 Return true if everything is as expected. Return false otherwise. */
418
419 static bool
420 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
421 enum vect_relevant relevant, vec<gimple> *worklist,
422 bool force)
423 {
424 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
425 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
426 stmt_vec_info dstmt_vinfo;
427 basic_block bb, def_bb;
428 tree def;
429 gimple def_stmt;
430 enum vect_def_type dt;
431
432 /* case 1: we are only interested in uses that need to be vectorized. Uses
433 that are used for address computation are not considered relevant. */
434 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
435 return true;
436
437 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
438 {
439 if (dump_enabled_p ())
440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
441 "not vectorized: unsupported use in stmt.");
442 return false;
443 }
444
445 if (!def_stmt || gimple_nop_p (def_stmt))
446 return true;
447
448 def_bb = gimple_bb (def_stmt);
449 if (!flow_bb_inside_loop_p (loop, def_bb))
450 {
451 if (dump_enabled_p ())
452 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
453 return true;
454 }
455
456 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457 DEF_STMT must have already been processed, because this should be the
458 only way that STMT, which is a reduction-phi, was put in the worklist,
459 as there should be no other uses for DEF_STMT in the loop. So we just
460 check that everything is as expected, and we are done. */
461 dstmt_vinfo = vinfo_for_stmt (def_stmt);
462 bb = gimple_bb (stmt);
463 if (gimple_code (stmt) == GIMPLE_PHI
464 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
465 && gimple_code (def_stmt) != GIMPLE_PHI
466 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
467 && bb->loop_father == def_bb->loop_father)
468 {
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_NOTE, vect_location,
471 "reduc-stmt defining reduc-phi in the same nest.");
472 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
473 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
474 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
475 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
476 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
477 return true;
478 }
479
480 /* case 3a: outer-loop stmt defining an inner-loop stmt:
481 outer-loop-header-bb:
482 d = def_stmt
483 inner-loop:
484 stmt # use (d)
485 outer-loop-tail-bb:
486 ... */
487 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "outer-loop def-stmt defining inner-loop stmt.");
492
493 switch (relevant)
494 {
495 case vect_unused_in_scope:
496 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
497 vect_used_in_scope : vect_unused_in_scope;
498 break;
499
500 case vect_used_in_outer_by_reduction:
501 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
502 relevant = vect_used_by_reduction;
503 break;
504
505 case vect_used_in_outer:
506 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
507 relevant = vect_used_in_scope;
508 break;
509
510 case vect_used_in_scope:
511 break;
512
513 default:
514 gcc_unreachable ();
515 }
516 }
517
518 /* case 3b: inner-loop stmt defining an outer-loop stmt:
519 outer-loop-header-bb:
520 ...
521 inner-loop:
522 d = def_stmt
523 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
524 stmt # use (d) */
525 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
526 {
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "inner-loop def-stmt defining outer-loop stmt.");
530
531 switch (relevant)
532 {
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
536 vect_used_in_outer_by_reduction : vect_unused_in_scope;
537 break;
538
539 case vect_used_by_reduction:
540 relevant = vect_used_in_outer_by_reduction;
541 break;
542
543 case vect_used_in_scope:
544 relevant = vect_used_in_outer;
545 break;
546
547 default:
548 gcc_unreachable ();
549 }
550 }
551
552 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
553 is_pattern_stmt_p (stmt_vinfo));
554 return true;
555 }
556
557
558 /* Function vect_mark_stmts_to_be_vectorized.
559
560 Not all stmts in the loop need to be vectorized. For example:
561
562 for i...
563 for j...
564 1. T0 = i + j
565 2. T1 = a[T0]
566
567 3. j = j + 1
568
569 Stmt 1 and 3 do not need to be vectorized, because loop control and
570 addressing of vectorized data-refs are handled differently.
571
572 This pass detects such stmts. */
573
574 bool
575 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
576 {
577 vec<gimple> worklist;
578 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
579 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
580 unsigned int nbbs = loop->num_nodes;
581 gimple_stmt_iterator si;
582 gimple stmt;
583 unsigned int i;
584 stmt_vec_info stmt_vinfo;
585 basic_block bb;
586 gimple phi;
587 bool live_p;
588 enum vect_relevant relevant, tmp_relevant;
589 enum vect_def_type def_type;
590
591 if (dump_enabled_p ())
592 dump_printf_loc (MSG_NOTE, vect_location,
593 "=== vect_mark_stmts_to_be_vectorized ===");
594
595 worklist.create (64);
596
597 /* 1. Init worklist. */
598 for (i = 0; i < nbbs; i++)
599 {
600 bb = bbs[i];
601 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
602 {
603 phi = gsi_stmt (si);
604 if (dump_enabled_p ())
605 {
606 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
607 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
608 }
609
610 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
611 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
612 }
613 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
614 {
615 stmt = gsi_stmt (si);
616 if (dump_enabled_p ())
617 {
618 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
619 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
620 }
621
622 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
623 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
624 }
625 }
626
627 /* 2. Process_worklist */
628 while (worklist.length () > 0)
629 {
630 use_operand_p use_p;
631 ssa_op_iter iter;
632
633 stmt = worklist.pop ();
634 if (dump_enabled_p ())
635 {
636 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
637 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
638 }
639
640 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
641 (DEF_STMT) as relevant/irrelevant and live/dead according to the
642 liveness and relevance properties of STMT. */
643 stmt_vinfo = vinfo_for_stmt (stmt);
644 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
645 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
646
647 /* Generally, the liveness and relevance properties of STMT are
648 propagated as is to the DEF_STMTs of its USEs:
649 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
650 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
651
652 One exception is when STMT has been identified as defining a reduction
653 variable; in this case we set the liveness/relevance as follows:
654 live_p = false
655 relevant = vect_used_by_reduction
656 This is because we distinguish between two kinds of relevant stmts -
657 those that are used by a reduction computation, and those that are
658 (also) used by a regular computation. This allows us later on to
659 identify stmts that are used solely by a reduction, and therefore the
660 order of the results that they produce does not have to be kept. */
661
662 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
663 tmp_relevant = relevant;
664 switch (def_type)
665 {
666 case vect_reduction_def:
667 switch (tmp_relevant)
668 {
669 case vect_unused_in_scope:
670 relevant = vect_used_by_reduction;
671 break;
672
673 case vect_used_by_reduction:
674 if (gimple_code (stmt) == GIMPLE_PHI)
675 break;
676 /* fall through */
677
678 default:
679 if (dump_enabled_p ())
680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
681 "unsupported use of reduction.");
682 worklist.release ();
683 return false;
684 }
685
686 live_p = false;
687 break;
688
689 case vect_nested_cycle:
690 if (tmp_relevant != vect_unused_in_scope
691 && tmp_relevant != vect_used_in_outer_by_reduction
692 && tmp_relevant != vect_used_in_outer)
693 {
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of nested cycle.");
697
698 worklist.release ();
699 return false;
700 }
701
702 live_p = false;
703 break;
704
705 case vect_double_reduction_def:
706 if (tmp_relevant != vect_unused_in_scope
707 && tmp_relevant != vect_used_by_reduction)
708 {
709 if (dump_enabled_p ())
710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
711 "unsupported use of double reduction.");
712
713 worklist.release ();
714 return false;
715 }
716
717 live_p = false;
718 break;
719
720 default:
721 break;
722 }
723
724 if (is_pattern_stmt_p (stmt_vinfo))
725 {
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt))
730 {
731 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732 tree op = gimple_assign_rhs1 (stmt);
733
734 i = 1;
735 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
736 {
737 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
738 live_p, relevant, &worklist, false)
739 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
740 live_p, relevant, &worklist, false))
741 {
742 worklist.release ();
743 return false;
744 }
745 i = 2;
746 }
747 for (; i < gimple_num_ops (stmt); i++)
748 {
749 op = gimple_op (stmt, i);
750 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
751 &worklist, false))
752 {
753 worklist.release ();
754 return false;
755 }
756 }
757 }
758 else if (is_gimple_call (stmt))
759 {
760 for (i = 0; i < gimple_call_num_args (stmt); i++)
761 {
762 tree arg = gimple_call_arg (stmt, i);
763 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
764 &worklist, false))
765 {
766 worklist.release ();
767 return false;
768 }
769 }
770 }
771 }
772 else
773 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 {
775 tree op = USE_FROM_PTR (use_p);
776 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
777 &worklist, false))
778 {
779 worklist.release ();
780 return false;
781 }
782 }
783
784 if (STMT_VINFO_GATHER_P (stmt_vinfo))
785 {
786 tree off;
787 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
788 gcc_assert (decl);
789 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
790 &worklist, true))
791 {
792 worklist.release ();
793 return false;
794 }
795 }
796 } /* while worklist */
797
798 worklist.release ();
799 return true;
800 }
801
802
803 /* Function vect_model_simple_cost.
804
805 Models cost for simple operations, i.e. those that only emit ncopies of a
806 single op. Right now, this does not account for multiple insns that could
807 be generated for the single vector op. We will handle that shortly. */
808
809 void
810 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
811 enum vect_def_type *dt,
812 stmt_vector_for_cost *prologue_cost_vec,
813 stmt_vector_for_cost *body_cost_vec)
814 {
815 int i;
816 int inside_cost = 0, prologue_cost = 0;
817
818 /* The SLP costs were already calculated during SLP tree build. */
819 if (PURE_SLP_STMT (stmt_info))
820 return;
821
822 /* FORNOW: Assuming maximum 2 args per stmts. */
823 for (i = 0; i < 2; i++)
824 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
825 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
826 stmt_info, 0, vect_prologue);
827
828 /* Pass the inside-of-loop statements to the target-specific cost model. */
829 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
830 stmt_info, 0, vect_body);
831
832 if (dump_enabled_p ())
833 dump_printf_loc (MSG_NOTE, vect_location,
834 "vect_model_simple_cost: inside_cost = %d, "
835 "prologue_cost = %d .", inside_cost, prologue_cost);
836 }
837
838
839 /* Model cost for type demotion and promotion operations. PWR is normally
840 zero for single-step promotions and demotions. It will be one if
841 two-step promotion/demotion is required, and so on. Each additional
842 step doubles the number of instructions required. */
843
844 static void
845 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
846 enum vect_def_type *dt, int pwr)
847 {
848 int i, tmp;
849 int inside_cost = 0, prologue_cost = 0;
850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
852 void *target_cost_data;
853
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info))
856 return;
857
858 if (loop_vinfo)
859 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
860 else
861 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
862
863 for (i = 0; i < pwr + 1; i++)
864 {
865 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
866 (i + 1) : i;
867 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
868 vec_promote_demote, stmt_info, 0,
869 vect_body);
870 }
871
872 /* FORNOW: Assuming maximum 2 args per stmts. */
873 for (i = 0; i < 2; i++)
874 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
875 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
876 stmt_info, 0, vect_prologue);
877
878 if (dump_enabled_p ())
879 dump_printf_loc (MSG_NOTE, vect_location,
880 "vect_model_promotion_demotion_cost: inside_cost = %d, "
881 "prologue_cost = %d .", inside_cost, prologue_cost);
882 }
883
884 /* Function vect_cost_group_size
885
886 For grouped load or store, return the group_size only if it is the first
887 load or store of a group, else return 1. This ensures that group size is
888 only returned once per group. */
889
890 static int
891 vect_cost_group_size (stmt_vec_info stmt_info)
892 {
893 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
894
895 if (first_stmt == STMT_VINFO_STMT (stmt_info))
896 return GROUP_SIZE (stmt_info);
897
898 return 1;
899 }
900
901
902 /* Function vect_model_store_cost
903
904 Models cost for stores. In the case of grouped accesses, one access
905 has the overhead of the grouped access attributed to it. */
906
907 void
908 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
909 bool store_lanes_p, enum vect_def_type dt,
910 slp_tree slp_node,
911 stmt_vector_for_cost *prologue_cost_vec,
912 stmt_vector_for_cost *body_cost_vec)
913 {
914 int group_size;
915 unsigned int inside_cost = 0, prologue_cost = 0;
916 struct data_reference *first_dr;
917 gimple first_stmt;
918
919 /* The SLP costs were already calculated during SLP tree build. */
920 if (PURE_SLP_STMT (stmt_info))
921 return;
922
923 if (dt == vect_constant_def || dt == vect_external_def)
924 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925 stmt_info, 0, vect_prologue);
926
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
929 {
930 if (slp_node)
931 {
932 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
933 group_size = 1;
934 }
935 else
936 {
937 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
938 group_size = vect_cost_group_size (stmt_info);
939 }
940
941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
942 }
943 /* Not a grouped access. */
944 else
945 {
946 group_size = 1;
947 first_dr = STMT_VINFO_DATA_REF (stmt_info);
948 }
949
950 /* We assume that the cost of a single store-lanes instruction is
951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p && group_size > 1)
955 {
956 /* Uses a high and low interleave operation for each needed permute. */
957
958 int nstmts = ncopies * exact_log2 (group_size) * group_size;
959 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
960 stmt_info, 0, vect_body);
961
962 if (dump_enabled_p ())
963 dump_printf_loc (MSG_NOTE, vect_location,
964 "vect_model_store_cost: strided group_size = %d .",
965 group_size);
966 }
967
968 /* Costs of the stores. */
969 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
970
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: inside_cost = %d, "
974 "prologue_cost = %d .", inside_cost, prologue_cost);
975 }
976
977
978 /* Calculate cost of DR's memory access. */
979 void
980 vect_get_store_cost (struct data_reference *dr, int ncopies,
981 unsigned int *inside_cost,
982 stmt_vector_for_cost *body_cost_vec)
983 {
984 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
985 gimple stmt = DR_STMT (dr);
986 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
987
988 switch (alignment_support_scheme)
989 {
990 case dr_aligned:
991 {
992 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
993 vector_store, stmt_info, 0,
994 vect_body);
995
996 if (dump_enabled_p ())
997 dump_printf_loc (MSG_NOTE, vect_location,
998 "vect_model_store_cost: aligned.");
999 break;
1000 }
1001
1002 case dr_unaligned_supported:
1003 {
1004 /* Here, we assign an additional cost for the unaligned store. */
1005 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1006 unaligned_store, stmt_info,
1007 DR_MISALIGNMENT (dr), vect_body);
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE, vect_location,
1010 "vect_model_store_cost: unaligned supported by "
1011 "hardware.");
1012 break;
1013 }
1014
1015 case dr_unaligned_unsupported:
1016 {
1017 *inside_cost = VECT_MAX_COST;
1018
1019 if (dump_enabled_p ())
1020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1021 "vect_model_store_cost: unsupported access.");
1022 break;
1023 }
1024
1025 default:
1026 gcc_unreachable ();
1027 }
1028 }
1029
1030
1031 /* Function vect_model_load_cost
1032
1033 Models cost for loads. In the case of grouped accesses, the last access
1034 has the overhead of the grouped access attributed to it. Since unaligned
1035 accesses are supported for loads, we also account for the costs of the
1036 access scheme chosen. */
1037
1038 void
1039 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1040 bool load_lanes_p, slp_tree slp_node,
1041 stmt_vector_for_cost *prologue_cost_vec,
1042 stmt_vector_for_cost *body_cost_vec)
1043 {
1044 int group_size;
1045 gimple first_stmt;
1046 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1047 unsigned int inside_cost = 0, prologue_cost = 0;
1048
1049 /* The SLP costs were already calculated during SLP tree build. */
1050 if (PURE_SLP_STMT (stmt_info))
1051 return;
1052
1053 /* Grouped accesses? */
1054 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1055 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1056 {
1057 group_size = vect_cost_group_size (stmt_info);
1058 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1059 }
1060 /* Not a grouped access. */
1061 else
1062 {
1063 group_size = 1;
1064 first_dr = dr;
1065 }
1066
1067 /* We assume that the cost of a single load-lanes instruction is
1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1071 if (!load_lanes_p && group_size > 1)
1072 {
1073 /* Uses an even and odd extract operations for each needed permute. */
1074 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1075 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 stmt_info, 0, vect_body);
1077
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE, vect_location,
1080 "vect_model_load_cost: strided group_size = %d .",
1081 group_size);
1082 }
1083
1084 /* The loads themselves. */
1085 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1086 {
1087 /* N scalar loads plus gathering them into a vector. */
1088 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1089 inside_cost += record_stmt_cost (body_cost_vec,
1090 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1091 scalar_load, stmt_info, 0, vect_body);
1092 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093 stmt_info, 0, vect_body);
1094 }
1095 else
1096 vect_get_load_cost (first_dr, ncopies,
1097 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1098 || group_size > 1 || slp_node),
1099 &inside_cost, &prologue_cost,
1100 prologue_cost_vec, body_cost_vec, true);
1101
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_load_cost: inside_cost = %d, "
1105 "prologue_cost = %d .", inside_cost, prologue_cost);
1106 }
1107
1108
1109 /* Calculate cost of DR's memory access. */
1110 void
1111 vect_get_load_cost (struct data_reference *dr, int ncopies,
1112 bool add_realign_cost, unsigned int *inside_cost,
1113 unsigned int *prologue_cost,
1114 stmt_vector_for_cost *prologue_cost_vec,
1115 stmt_vector_for_cost *body_cost_vec,
1116 bool record_prologue_costs)
1117 {
1118 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1119 gimple stmt = DR_STMT (dr);
1120 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1121
1122 switch (alignment_support_scheme)
1123 {
1124 case dr_aligned:
1125 {
1126 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1127 stmt_info, 0, vect_body);
1128
1129 if (dump_enabled_p ())
1130 dump_printf_loc (MSG_NOTE, vect_location,
1131 "vect_model_load_cost: aligned.");
1132
1133 break;
1134 }
1135 case dr_unaligned_supported:
1136 {
1137 /* Here, we assign an additional cost for the unaligned load. */
1138 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1139 unaligned_load, stmt_info,
1140 DR_MISALIGNMENT (dr), vect_body);
1141
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "vect_model_load_cost: unaligned supported by "
1145 "hardware.");
1146
1147 break;
1148 }
1149 case dr_explicit_realign:
1150 {
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1152 vector_load, stmt_info, 0, vect_body);
1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1154 vec_perm, stmt_info, 0, vect_body);
1155
1156 /* FIXME: If the misalignment remains fixed across the iterations of
1157 the containing loop, the following cost should be added to the
1158 prologue costs. */
1159 if (targetm.vectorize.builtin_mask_for_load)
1160 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1161 stmt_info, 0, vect_body);
1162
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: explicit realign");
1166
1167 break;
1168 }
1169 case dr_explicit_realign_optimized:
1170 {
1171 if (dump_enabled_p ())
1172 dump_printf_loc (MSG_NOTE, vect_location,
1173 "vect_model_load_cost: unaligned software "
1174 "pipelined.");
1175
1176 /* Unaligned software pipeline has a load of an address, an initial
1177 load, and possibly a mask operation to "prime" the loop. However,
1178 if this is an access in a group of loads, which provide grouped
1179 access, then the above cost should only be considered for one
1180 access in the group. Inside the loop, there is a load op
1181 and a realignment op. */
1182
1183 if (add_realign_cost && record_prologue_costs)
1184 {
1185 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1186 vector_stmt, stmt_info,
1187 0, vect_prologue);
1188 if (targetm.vectorize.builtin_mask_for_load)
1189 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1190 vector_stmt, stmt_info,
1191 0, vect_prologue);
1192 }
1193
1194 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1195 stmt_info, 0, vect_body);
1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1197 stmt_info, 0, vect_body);
1198
1199 if (dump_enabled_p ())
1200 dump_printf_loc (MSG_NOTE, vect_location,
1201 "vect_model_load_cost: explicit realign optimized");
1202
1203 break;
1204 }
1205
1206 case dr_unaligned_unsupported:
1207 {
1208 *inside_cost = VECT_MAX_COST;
1209
1210 if (dump_enabled_p ())
1211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1212 "vect_model_load_cost: unsupported access.");
1213 break;
1214 }
1215
1216 default:
1217 gcc_unreachable ();
1218 }
1219 }
1220
1221 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1222 the loop preheader for the vectorized stmt STMT. */
1223
1224 static void
1225 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1226 {
1227 if (gsi)
1228 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1229 else
1230 {
1231 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1233
1234 if (loop_vinfo)
1235 {
1236 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1237 basic_block new_bb;
1238 edge pe;
1239
1240 if (nested_in_vect_loop_p (loop, stmt))
1241 loop = loop->inner;
1242
1243 pe = loop_preheader_edge (loop);
1244 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1245 gcc_assert (!new_bb);
1246 }
1247 else
1248 {
1249 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1250 basic_block bb;
1251 gimple_stmt_iterator gsi_bb_start;
1252
1253 gcc_assert (bb_vinfo);
1254 bb = BB_VINFO_BB (bb_vinfo);
1255 gsi_bb_start = gsi_after_labels (bb);
1256 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1257 }
1258 }
1259
1260 if (dump_enabled_p ())
1261 {
1262 dump_printf_loc (MSG_NOTE, vect_location,
1263 "created new init_stmt: ");
1264 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1265 }
1266 }
1267
1268 /* Function vect_init_vector.
1269
1270 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1271 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1272 vector type a vector with all elements equal to VAL is created first.
1273 Place the initialization at BSI if it is not NULL. Otherwise, place the
1274 initialization at the loop preheader.
1275 Return the DEF of INIT_STMT.
1276 It will be used in the vectorization of STMT. */
1277
1278 tree
1279 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1280 {
1281 tree new_var;
1282 gimple init_stmt;
1283 tree vec_oprnd;
1284 tree new_temp;
1285
1286 if (TREE_CODE (type) == VECTOR_TYPE
1287 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1288 {
1289 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1290 {
1291 if (CONSTANT_CLASS_P (val))
1292 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1293 else
1294 {
1295 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1296 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1297 new_temp, val,
1298 NULL_TREE);
1299 vect_init_vector_1 (stmt, init_stmt, gsi);
1300 val = new_temp;
1301 }
1302 }
1303 val = build_vector_from_val (type, val);
1304 }
1305
1306 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1307 init_stmt = gimple_build_assign (new_var, val);
1308 new_temp = make_ssa_name (new_var, init_stmt);
1309 gimple_assign_set_lhs (init_stmt, new_temp);
1310 vect_init_vector_1 (stmt, init_stmt, gsi);
1311 vec_oprnd = gimple_assign_lhs (init_stmt);
1312 return vec_oprnd;
1313 }
1314
1315
1316 /* Function vect_get_vec_def_for_operand.
1317
1318 OP is an operand in STMT. This function returns a (vector) def that will be
1319 used in the vectorized stmt for STMT.
1320
1321 In the case that OP is an SSA_NAME which is defined in the loop, then
1322 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1323
1324 In case OP is an invariant or constant, a new stmt that creates a vector def
1325 needs to be introduced. */
1326
1327 tree
1328 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1329 {
1330 tree vec_oprnd;
1331 gimple vec_stmt;
1332 gimple def_stmt;
1333 stmt_vec_info def_stmt_info = NULL;
1334 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1335 unsigned int nunits;
1336 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1337 tree def;
1338 enum vect_def_type dt;
1339 bool is_simple_use;
1340 tree vector_type;
1341
1342 if (dump_enabled_p ())
1343 {
1344 dump_printf_loc (MSG_NOTE, vect_location,
1345 "vect_get_vec_def_for_operand: ");
1346 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1347 }
1348
1349 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1350 &def_stmt, &def, &dt);
1351 gcc_assert (is_simple_use);
1352 if (dump_enabled_p ())
1353 {
1354 int loc_printed = 0;
1355 if (def)
1356 {
1357 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1358 loc_printed = 1;
1359 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1360 }
1361 if (def_stmt)
1362 {
1363 if (loc_printed)
1364 dump_printf (MSG_NOTE, " def_stmt = ");
1365 else
1366 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1367 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1368 }
1369 }
1370
1371 switch (dt)
1372 {
1373 /* Case 1: operand is a constant. */
1374 case vect_constant_def:
1375 {
1376 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1377 gcc_assert (vector_type);
1378 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1379
1380 if (scalar_def)
1381 *scalar_def = op;
1382
1383 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1384 if (dump_enabled_p ())
1385 dump_printf_loc (MSG_NOTE, vect_location,
1386 "Create vector_cst. nunits = %d", nunits);
1387
1388 return vect_init_vector (stmt, op, vector_type, NULL);
1389 }
1390
1391 /* Case 2: operand is defined outside the loop - loop invariant. */
1392 case vect_external_def:
1393 {
1394 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1395 gcc_assert (vector_type);
1396
1397 if (scalar_def)
1398 *scalar_def = def;
1399
1400 /* Create 'vec_inv = {inv,inv,..,inv}' */
1401 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
1403
1404 return vect_init_vector (stmt, def, vector_type, NULL);
1405 }
1406
1407 /* Case 3: operand is defined inside the loop. */
1408 case vect_internal_def:
1409 {
1410 if (scalar_def)
1411 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1412
1413 /* Get the def from the vectorized stmt. */
1414 def_stmt_info = vinfo_for_stmt (def_stmt);
1415
1416 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1417 /* Get vectorized pattern statement. */
1418 if (!vec_stmt
1419 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1420 && !STMT_VINFO_RELEVANT (def_stmt_info))
1421 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1422 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1423 gcc_assert (vec_stmt);
1424 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1425 vec_oprnd = PHI_RESULT (vec_stmt);
1426 else if (is_gimple_call (vec_stmt))
1427 vec_oprnd = gimple_call_lhs (vec_stmt);
1428 else
1429 vec_oprnd = gimple_assign_lhs (vec_stmt);
1430 return vec_oprnd;
1431 }
1432
1433 /* Case 4: operand is defined by a loop header phi - reduction */
1434 case vect_reduction_def:
1435 case vect_double_reduction_def:
1436 case vect_nested_cycle:
1437 {
1438 struct loop *loop;
1439
1440 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1441 loop = (gimple_bb (def_stmt))->loop_father;
1442
1443 /* Get the def before the loop */
1444 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1445 return get_initial_def_for_reduction (stmt, op, scalar_def);
1446 }
1447
1448 /* Case 5: operand is defined by loop-header phi - induction. */
1449 case vect_induction_def:
1450 {
1451 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1452
1453 /* Get the def from the vectorized stmt. */
1454 def_stmt_info = vinfo_for_stmt (def_stmt);
1455 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1456 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1457 vec_oprnd = PHI_RESULT (vec_stmt);
1458 else
1459 vec_oprnd = gimple_get_lhs (vec_stmt);
1460 return vec_oprnd;
1461 }
1462
1463 default:
1464 gcc_unreachable ();
1465 }
1466 }
1467
1468
1469 /* Function vect_get_vec_def_for_stmt_copy
1470
1471 Return a vector-def for an operand. This function is used when the
1472 vectorized stmt to be created (by the caller to this function) is a "copy"
1473 created in case the vectorized result cannot fit in one vector, and several
1474 copies of the vector-stmt are required. In this case the vector-def is
1475 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1476 of the stmt that defines VEC_OPRND.
1477 DT is the type of the vector def VEC_OPRND.
1478
1479 Context:
1480 In case the vectorization factor (VF) is bigger than the number
1481 of elements that can fit in a vectype (nunits), we have to generate
1482 more than one vector stmt to vectorize the scalar stmt. This situation
1483 arises when there are multiple data-types operated upon in the loop; the
1484 smallest data-type determines the VF, and as a result, when vectorizing
1485 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1486 vector stmt (each computing a vector of 'nunits' results, and together
1487 computing 'VF' results in each iteration). This function is called when
1488 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1489 which VF=16 and nunits=4, so the number of copies required is 4):
1490
1491 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1492
1493 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1494 VS1.1: vx.1 = memref1 VS1.2
1495 VS1.2: vx.2 = memref2 VS1.3
1496 VS1.3: vx.3 = memref3
1497
1498 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1499 VSnew.1: vz1 = vx.1 + ... VSnew.2
1500 VSnew.2: vz2 = vx.2 + ... VSnew.3
1501 VSnew.3: vz3 = vx.3 + ...
1502
1503 The vectorization of S1 is explained in vectorizable_load.
1504 The vectorization of S2:
1505 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1506 the function 'vect_get_vec_def_for_operand' is called to
1507 get the relevant vector-def for each operand of S2. For operand x it
1508 returns the vector-def 'vx.0'.
1509
1510 To create the remaining copies of the vector-stmt (VSnew.j), this
1511 function is called to get the relevant vector-def for each operand. It is
1512 obtained from the respective VS1.j stmt, which is recorded in the
1513 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1514
1515 For example, to obtain the vector-def 'vx.1' in order to create the
1516 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1517 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1518 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1519 and return its def ('vx.1').
1520 Overall, to create the above sequence this function will be called 3 times:
1521 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1522 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1523 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1524
1525 tree
1526 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1527 {
1528 gimple vec_stmt_for_operand;
1529 stmt_vec_info def_stmt_info;
1530
1531 /* Do nothing; can reuse same def. */
1532 if (dt == vect_external_def || dt == vect_constant_def )
1533 return vec_oprnd;
1534
1535 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1536 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1537 gcc_assert (def_stmt_info);
1538 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1539 gcc_assert (vec_stmt_for_operand);
1540 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1541 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1542 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1543 else
1544 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1545 return vec_oprnd;
1546 }
1547
1548
1549 /* Get vectorized definitions for the operands to create a copy of an original
1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1551
1552 static void
1553 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1554 vec<tree> *vec_oprnds0,
1555 vec<tree> *vec_oprnds1)
1556 {
1557 tree vec_oprnd = vec_oprnds0->pop ();
1558
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1560 vec_oprnds0->quick_push (vec_oprnd);
1561
1562 if (vec_oprnds1 && vec_oprnds1->length ())
1563 {
1564 vec_oprnd = vec_oprnds1->pop ();
1565 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1566 vec_oprnds1->quick_push (vec_oprnd);
1567 }
1568 }
1569
1570
1571 /* Get vectorized definitions for OP0 and OP1.
1572 REDUC_INDEX is the index of reduction operand in case of reduction,
1573 and -1 otherwise. */
1574
1575 void
1576 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1577 vec<tree> *vec_oprnds0,
1578 vec<tree> *vec_oprnds1,
1579 slp_tree slp_node, int reduc_index)
1580 {
1581 if (slp_node)
1582 {
1583 int nops = (op1 == NULL_TREE) ? 1 : 2;
1584 vec<tree> ops;
1585 ops.create (nops);
1586 vec<vec<tree> > vec_defs;
1587 vec_defs.create (nops);
1588
1589 ops.quick_push (op0);
1590 if (op1)
1591 ops.quick_push (op1);
1592
1593 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1594
1595 *vec_oprnds0 = vec_defs[0];
1596 if (op1)
1597 *vec_oprnds1 = vec_defs[1];
1598
1599 ops.release ();
1600 vec_defs.release ();
1601 }
1602 else
1603 {
1604 tree vec_oprnd;
1605
1606 vec_oprnds0->create (1);
1607 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1608 vec_oprnds0->quick_push (vec_oprnd);
1609
1610 if (op1)
1611 {
1612 vec_oprnds1->create (1);
1613 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1614 vec_oprnds1->quick_push (vec_oprnd);
1615 }
1616 }
1617 }
1618
1619
1620 /* Function vect_finish_stmt_generation.
1621
1622 Insert a new stmt. */
1623
1624 void
1625 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1626 gimple_stmt_iterator *gsi)
1627 {
1628 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1629 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1630 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1631
1632 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1633
1634 if (!gsi_end_p (*gsi)
1635 && gimple_has_mem_ops (vec_stmt))
1636 {
1637 gimple at_stmt = gsi_stmt (*gsi);
1638 tree vuse = gimple_vuse (at_stmt);
1639 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1640 {
1641 tree vdef = gimple_vdef (at_stmt);
1642 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1643 /* If we have an SSA vuse and insert a store, update virtual
1644 SSA form to avoid triggering the renamer. Do so only
1645 if we can easily see all uses - which is what almost always
1646 happens with the way vectorized stmts are inserted. */
1647 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1648 && ((is_gimple_assign (vec_stmt)
1649 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1650 || (is_gimple_call (vec_stmt)
1651 && !(gimple_call_flags (vec_stmt)
1652 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1653 {
1654 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1655 gimple_set_vdef (vec_stmt, new_vdef);
1656 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1657 }
1658 }
1659 }
1660 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1661
1662 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1663 bb_vinfo));
1664
1665 if (dump_enabled_p ())
1666 {
1667 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1669 }
1670
1671 gimple_set_location (vec_stmt, gimple_location (stmt));
1672 }
1673
1674 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1675 a function declaration if the target has a vectorized version
1676 of the function, or NULL_TREE if the function cannot be vectorized. */
1677
1678 tree
1679 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1680 {
1681 tree fndecl = gimple_call_fndecl (call);
1682
1683 /* We only handle functions that do not read or clobber memory -- i.e.
1684 const or novops ones. */
1685 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1686 return NULL_TREE;
1687
1688 if (!fndecl
1689 || TREE_CODE (fndecl) != FUNCTION_DECL
1690 || !DECL_BUILT_IN (fndecl))
1691 return NULL_TREE;
1692
1693 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1694 vectype_in);
1695 }
1696
1697 /* Function vectorizable_call.
1698
1699 Check if STMT performs a function call that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1703
1704 static bool
1705 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1706 slp_tree slp_node)
1707 {
1708 tree vec_dest;
1709 tree scalar_dest;
1710 tree op, type;
1711 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1713 tree vectype_out, vectype_in;
1714 int nunits_in;
1715 int nunits_out;
1716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1717 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1718 tree fndecl, new_temp, def, rhs_type;
1719 gimple def_stmt;
1720 enum vect_def_type dt[3]
1721 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1722 gimple new_stmt = NULL;
1723 int ncopies, j;
1724 vec<tree> vargs = vNULL;
1725 enum { NARROW, NONE, WIDEN } modifier;
1726 size_t i, nargs;
1727 tree lhs;
1728
1729 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1730 return false;
1731
1732 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1733 return false;
1734
1735 /* Is STMT a vectorizable call? */
1736 if (!is_gimple_call (stmt))
1737 return false;
1738
1739 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1740 return false;
1741
1742 if (stmt_can_throw_internal (stmt))
1743 return false;
1744
1745 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1746
1747 /* Process function arguments. */
1748 rhs_type = NULL_TREE;
1749 vectype_in = NULL_TREE;
1750 nargs = gimple_call_num_args (stmt);
1751
1752 /* Bail out if the function has more than three arguments, we do not have
1753 interesting builtin functions to vectorize with more than two arguments
1754 except for fma. No arguments is also not good. */
1755 if (nargs == 0 || nargs > 3)
1756 return false;
1757
1758 for (i = 0; i < nargs; i++)
1759 {
1760 tree opvectype;
1761
1762 op = gimple_call_arg (stmt, i);
1763
1764 /* We can only handle calls with arguments of the same type. */
1765 if (rhs_type
1766 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1767 {
1768 if (dump_enabled_p ())
1769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1770 "argument types differ.");
1771 return false;
1772 }
1773 if (!rhs_type)
1774 rhs_type = TREE_TYPE (op);
1775
1776 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1777 &def_stmt, &def, &dt[i], &opvectype))
1778 {
1779 if (dump_enabled_p ())
1780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1781 "use not simple.");
1782 return false;
1783 }
1784
1785 if (!vectype_in)
1786 vectype_in = opvectype;
1787 else if (opvectype
1788 && opvectype != vectype_in)
1789 {
1790 if (dump_enabled_p ())
1791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1792 "argument vector types differ.");
1793 return false;
1794 }
1795 }
1796 /* If all arguments are external or constant defs use a vector type with
1797 the same size as the output vector type. */
1798 if (!vectype_in)
1799 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1800 if (vec_stmt)
1801 gcc_assert (vectype_in);
1802 if (!vectype_in)
1803 {
1804 if (dump_enabled_p ())
1805 {
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807 "no vectype for scalar type ");
1808 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1809 }
1810
1811 return false;
1812 }
1813
1814 /* FORNOW */
1815 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1816 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1817 if (nunits_in == nunits_out / 2)
1818 modifier = NARROW;
1819 else if (nunits_out == nunits_in)
1820 modifier = NONE;
1821 else if (nunits_out == nunits_in / 2)
1822 modifier = WIDEN;
1823 else
1824 return false;
1825
1826 /* For now, we only vectorize functions if a target specific builtin
1827 is available. TODO -- in some cases, it might be profitable to
1828 insert the calls for pieces of the vector, in order to be able
1829 to vectorize other operations in the loop. */
1830 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1831 if (fndecl == NULL_TREE)
1832 {
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1835 "function is not vectorizable.");
1836
1837 return false;
1838 }
1839
1840 gcc_assert (!gimple_vuse (stmt));
1841
1842 if (slp_node || PURE_SLP_STMT (stmt_info))
1843 ncopies = 1;
1844 else if (modifier == NARROW)
1845 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1846 else
1847 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1848
1849 /* Sanity check: make sure that at least one copy of the vectorized stmt
1850 needs to be generated. */
1851 gcc_assert (ncopies >= 1);
1852
1853 if (!vec_stmt) /* transformation not required. */
1854 {
1855 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
1858 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1859 return true;
1860 }
1861
1862 /** Transform. **/
1863
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
1866
1867 /* Handle def. */
1868 scalar_dest = gimple_call_lhs (stmt);
1869 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1870
1871 prev_stmt_info = NULL;
1872 switch (modifier)
1873 {
1874 case NONE:
1875 for (j = 0; j < ncopies; ++j)
1876 {
1877 /* Build argument list for the vectorized call. */
1878 if (j == 0)
1879 vargs.create (nargs);
1880 else
1881 vargs.truncate (0);
1882
1883 if (slp_node)
1884 {
1885 vec<vec<tree> > vec_defs;
1886 vec_defs.create (nargs);
1887 vec<tree> vec_oprnds0;
1888
1889 for (i = 0; i < nargs; i++)
1890 vargs.quick_push (gimple_call_arg (stmt, i));
1891 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1892 vec_oprnds0 = vec_defs[0];
1893
1894 /* Arguments are ready. Create the new vector stmt. */
1895 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
1896 {
1897 size_t k;
1898 for (k = 0; k < nargs; k++)
1899 {
1900 vec<tree> vec_oprndsk = vec_defs[k];
1901 vargs[k] = vec_oprndsk[i];
1902 }
1903 new_stmt = gimple_build_call_vec (fndecl, vargs);
1904 new_temp = make_ssa_name (vec_dest, new_stmt);
1905 gimple_call_set_lhs (new_stmt, new_temp);
1906 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1907 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1908 }
1909
1910 for (i = 0; i < nargs; i++)
1911 {
1912 vec<tree> vec_oprndsi = vec_defs[i];
1913 vec_oprndsi.release ();
1914 }
1915 vec_defs.release ();
1916 continue;
1917 }
1918
1919 for (i = 0; i < nargs; i++)
1920 {
1921 op = gimple_call_arg (stmt, i);
1922 if (j == 0)
1923 vec_oprnd0
1924 = vect_get_vec_def_for_operand (op, stmt, NULL);
1925 else
1926 {
1927 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1928 vec_oprnd0
1929 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1930 }
1931
1932 vargs.quick_push (vec_oprnd0);
1933 }
1934
1935 new_stmt = gimple_build_call_vec (fndecl, vargs);
1936 new_temp = make_ssa_name (vec_dest, new_stmt);
1937 gimple_call_set_lhs (new_stmt, new_temp);
1938 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1939
1940 if (j == 0)
1941 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1942 else
1943 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1944
1945 prev_stmt_info = vinfo_for_stmt (new_stmt);
1946 }
1947
1948 break;
1949
1950 case NARROW:
1951 for (j = 0; j < ncopies; ++j)
1952 {
1953 /* Build argument list for the vectorized call. */
1954 if (j == 0)
1955 vargs.create (nargs * 2);
1956 else
1957 vargs.truncate (0);
1958
1959 if (slp_node)
1960 {
1961 vec<vec<tree> > vec_defs;
1962 vec_defs.create (nargs);
1963 vec<tree> vec_oprnds0;
1964
1965 for (i = 0; i < nargs; i++)
1966 vargs.quick_push (gimple_call_arg (stmt, i));
1967 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1968 vec_oprnds0 = vec_defs[0];
1969
1970 /* Arguments are ready. Create the new vector stmt. */
1971 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
1972 {
1973 size_t k;
1974 vargs.truncate (0);
1975 for (k = 0; k < nargs; k++)
1976 {
1977 vec<tree> vec_oprndsk = vec_defs[k];
1978 vargs.quick_push (vec_oprndsk[i]);
1979 vargs.quick_push (vec_oprndsk[i + 1]);
1980 }
1981 new_stmt = gimple_build_call_vec (fndecl, vargs);
1982 new_temp = make_ssa_name (vec_dest, new_stmt);
1983 gimple_call_set_lhs (new_stmt, new_temp);
1984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1985 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1986 }
1987
1988 for (i = 0; i < nargs; i++)
1989 {
1990 vec<tree> vec_oprndsi = vec_defs[i];
1991 vec_oprndsi.release ();
1992 }
1993 vec_defs.release ();
1994 continue;
1995 }
1996
1997 for (i = 0; i < nargs; i++)
1998 {
1999 op = gimple_call_arg (stmt, i);
2000 if (j == 0)
2001 {
2002 vec_oprnd0
2003 = vect_get_vec_def_for_operand (op, stmt, NULL);
2004 vec_oprnd1
2005 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2006 }
2007 else
2008 {
2009 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2010 vec_oprnd0
2011 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2012 vec_oprnd1
2013 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2014 }
2015
2016 vargs.quick_push (vec_oprnd0);
2017 vargs.quick_push (vec_oprnd1);
2018 }
2019
2020 new_stmt = gimple_build_call_vec (fndecl, vargs);
2021 new_temp = make_ssa_name (vec_dest, new_stmt);
2022 gimple_call_set_lhs (new_stmt, new_temp);
2023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2024
2025 if (j == 0)
2026 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2027 else
2028 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2029
2030 prev_stmt_info = vinfo_for_stmt (new_stmt);
2031 }
2032
2033 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2034
2035 break;
2036
2037 case WIDEN:
2038 /* No current target implements this case. */
2039 return false;
2040 }
2041
2042 vargs.release ();
2043
2044 /* Update the exception handling table with the vector stmt if necessary. */
2045 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2046 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2047
2048 /* The call in STMT might prevent it from being removed in dce.
2049 We however cannot remove it here, due to the way the ssa name
2050 it defines is mapped to the new definition. So just replace
2051 rhs of the statement with something harmless. */
2052
2053 if (slp_node)
2054 return true;
2055
2056 type = TREE_TYPE (scalar_dest);
2057 if (is_pattern_stmt_p (stmt_info))
2058 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2059 else
2060 lhs = gimple_call_lhs (stmt);
2061 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2062 set_vinfo_for_stmt (new_stmt, stmt_info);
2063 set_vinfo_for_stmt (stmt, NULL);
2064 STMT_VINFO_STMT (stmt_info) = new_stmt;
2065 gsi_replace (gsi, new_stmt, false);
2066 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2067
2068 return true;
2069 }
2070
2071
2072 /* Function vect_gen_widened_results_half
2073
2074 Create a vector stmt whose code, type, number of arguments, and result
2075 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2076 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2077 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2078 needs to be created (DECL is a function-decl of a target-builtin).
2079 STMT is the original scalar stmt that we are vectorizing. */
2080
2081 static gimple
2082 vect_gen_widened_results_half (enum tree_code code,
2083 tree decl,
2084 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2085 tree vec_dest, gimple_stmt_iterator *gsi,
2086 gimple stmt)
2087 {
2088 gimple new_stmt;
2089 tree new_temp;
2090
2091 /* Generate half of the widened result: */
2092 if (code == CALL_EXPR)
2093 {
2094 /* Target specific support */
2095 if (op_type == binary_op)
2096 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2097 else
2098 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2099 new_temp = make_ssa_name (vec_dest, new_stmt);
2100 gimple_call_set_lhs (new_stmt, new_temp);
2101 }
2102 else
2103 {
2104 /* Generic support */
2105 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2106 if (op_type != binary_op)
2107 vec_oprnd1 = NULL;
2108 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2109 vec_oprnd1);
2110 new_temp = make_ssa_name (vec_dest, new_stmt);
2111 gimple_assign_set_lhs (new_stmt, new_temp);
2112 }
2113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2114
2115 return new_stmt;
2116 }
2117
2118
2119 /* Get vectorized definitions for loop-based vectorization. For the first
2120 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2121 scalar operand), and for the rest we get a copy with
2122 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2123 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2124 The vectors are collected into VEC_OPRNDS. */
2125
2126 static void
2127 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2128 vec<tree> *vec_oprnds, int multi_step_cvt)
2129 {
2130 tree vec_oprnd;
2131
2132 /* Get first vector operand. */
2133 /* All the vector operands except the very first one (that is scalar oprnd)
2134 are stmt copies. */
2135 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2136 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2137 else
2138 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2139
2140 vec_oprnds->quick_push (vec_oprnd);
2141
2142 /* Get second vector operand. */
2143 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2144 vec_oprnds->quick_push (vec_oprnd);
2145
2146 *oprnd = vec_oprnd;
2147
2148 /* For conversion in multiple steps, continue to get operands
2149 recursively. */
2150 if (multi_step_cvt)
2151 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2152 }
2153
2154
2155 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2156 For multi-step conversions store the resulting vectors and call the function
2157 recursively. */
2158
2159 static void
2160 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
2161 int multi_step_cvt, gimple stmt,
2162 vec<tree> vec_dsts,
2163 gimple_stmt_iterator *gsi,
2164 slp_tree slp_node, enum tree_code code,
2165 stmt_vec_info *prev_stmt_info)
2166 {
2167 unsigned int i;
2168 tree vop0, vop1, new_tmp, vec_dest;
2169 gimple new_stmt;
2170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2171
2172 vec_dest = vec_dsts.pop ();
2173
2174 for (i = 0; i < vec_oprnds->length (); i += 2)
2175 {
2176 /* Create demotion operation. */
2177 vop0 = (*vec_oprnds)[i];
2178 vop1 = (*vec_oprnds)[i + 1];
2179 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2180 new_tmp = make_ssa_name (vec_dest, new_stmt);
2181 gimple_assign_set_lhs (new_stmt, new_tmp);
2182 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2183
2184 if (multi_step_cvt)
2185 /* Store the resulting vector for next recursive call. */
2186 (*vec_oprnds)[i/2] = new_tmp;
2187 else
2188 {
2189 /* This is the last step of the conversion sequence. Store the
2190 vectors in SLP_NODE or in vector info of the scalar statement
2191 (or in STMT_VINFO_RELATED_STMT chain). */
2192 if (slp_node)
2193 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2194 else
2195 {
2196 if (!*prev_stmt_info)
2197 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2198 else
2199 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2200
2201 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2202 }
2203 }
2204 }
2205
2206 /* For multi-step demotion operations we first generate demotion operations
2207 from the source type to the intermediate types, and then combine the
2208 results (stored in VEC_OPRNDS) in demotion operation to the destination
2209 type. */
2210 if (multi_step_cvt)
2211 {
2212 /* At each level of recursion we have half of the operands we had at the
2213 previous level. */
2214 vec_oprnds->truncate ((i+1)/2);
2215 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2216 stmt, vec_dsts, gsi, slp_node,
2217 VEC_PACK_TRUNC_EXPR,
2218 prev_stmt_info);
2219 }
2220
2221 vec_dsts.quick_push (vec_dest);
2222 }
2223
2224
2225 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2226 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2227 the resulting vectors and call the function recursively. */
2228
2229 static void
2230 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2231 vec<tree> *vec_oprnds1,
2232 gimple stmt, tree vec_dest,
2233 gimple_stmt_iterator *gsi,
2234 enum tree_code code1,
2235 enum tree_code code2, tree decl1,
2236 tree decl2, int op_type)
2237 {
2238 int i;
2239 tree vop0, vop1, new_tmp1, new_tmp2;
2240 gimple new_stmt1, new_stmt2;
2241 vec<tree> vec_tmp = vNULL;
2242
2243 vec_tmp.create (vec_oprnds0->length () * 2);
2244 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
2245 {
2246 if (op_type == binary_op)
2247 vop1 = (*vec_oprnds1)[i];
2248 else
2249 vop1 = NULL_TREE;
2250
2251 /* Generate the two halves of promotion operation. */
2252 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2253 op_type, vec_dest, gsi, stmt);
2254 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2255 op_type, vec_dest, gsi, stmt);
2256 if (is_gimple_call (new_stmt1))
2257 {
2258 new_tmp1 = gimple_call_lhs (new_stmt1);
2259 new_tmp2 = gimple_call_lhs (new_stmt2);
2260 }
2261 else
2262 {
2263 new_tmp1 = gimple_assign_lhs (new_stmt1);
2264 new_tmp2 = gimple_assign_lhs (new_stmt2);
2265 }
2266
2267 /* Store the results for the next step. */
2268 vec_tmp.quick_push (new_tmp1);
2269 vec_tmp.quick_push (new_tmp2);
2270 }
2271
2272 vec_oprnds0->release ();
2273 *vec_oprnds0 = vec_tmp;
2274 }
2275
2276
2277 /* Check if STMT performs a conversion operation, that can be vectorized.
2278 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2279 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2280 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2281
2282 static bool
2283 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2284 gimple *vec_stmt, slp_tree slp_node)
2285 {
2286 tree vec_dest;
2287 tree scalar_dest;
2288 tree op0, op1 = NULL_TREE;
2289 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2291 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2292 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2293 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2294 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2295 tree new_temp;
2296 tree def;
2297 gimple def_stmt;
2298 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2299 gimple new_stmt = NULL;
2300 stmt_vec_info prev_stmt_info;
2301 int nunits_in;
2302 int nunits_out;
2303 tree vectype_out, vectype_in;
2304 int ncopies, i, j;
2305 tree lhs_type, rhs_type;
2306 enum { NARROW, NONE, WIDEN } modifier;
2307 vec<tree> vec_oprnds0 = vNULL;
2308 vec<tree> vec_oprnds1 = vNULL;
2309 tree vop0;
2310 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2311 int multi_step_cvt = 0;
2312 vec<tree> vec_dsts = vNULL;
2313 vec<tree> interm_types = vNULL;
2314 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2315 int op_type;
2316 enum machine_mode rhs_mode;
2317 unsigned short fltsz;
2318
2319 /* Is STMT a vectorizable conversion? */
2320
2321 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2322 return false;
2323
2324 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2325 return false;
2326
2327 if (!is_gimple_assign (stmt))
2328 return false;
2329
2330 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2331 return false;
2332
2333 code = gimple_assign_rhs_code (stmt);
2334 if (!CONVERT_EXPR_CODE_P (code)
2335 && code != FIX_TRUNC_EXPR
2336 && code != FLOAT_EXPR
2337 && code != WIDEN_MULT_EXPR
2338 && code != WIDEN_LSHIFT_EXPR)
2339 return false;
2340
2341 op_type = TREE_CODE_LENGTH (code);
2342
2343 /* Check types of lhs and rhs. */
2344 scalar_dest = gimple_assign_lhs (stmt);
2345 lhs_type = TREE_TYPE (scalar_dest);
2346 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2347
2348 op0 = gimple_assign_rhs1 (stmt);
2349 rhs_type = TREE_TYPE (op0);
2350
2351 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2352 && !((INTEGRAL_TYPE_P (lhs_type)
2353 && INTEGRAL_TYPE_P (rhs_type))
2354 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2355 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2356 return false;
2357
2358 if ((INTEGRAL_TYPE_P (lhs_type)
2359 && (TYPE_PRECISION (lhs_type)
2360 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2361 || (INTEGRAL_TYPE_P (rhs_type)
2362 && (TYPE_PRECISION (rhs_type)
2363 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2364 {
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367 "type conversion to/from bit-precision unsupported.");
2368 return false;
2369 }
2370
2371 /* Check the operands of the operation. */
2372 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2373 &def_stmt, &def, &dt[0], &vectype_in))
2374 {
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 "use not simple.");
2378 return false;
2379 }
2380 if (op_type == binary_op)
2381 {
2382 bool ok;
2383
2384 op1 = gimple_assign_rhs2 (stmt);
2385 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2386 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2387 OP1. */
2388 if (CONSTANT_CLASS_P (op0))
2389 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2390 &def_stmt, &def, &dt[1], &vectype_in);
2391 else
2392 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2393 &def, &dt[1]);
2394
2395 if (!ok)
2396 {
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2399 "use not simple.");
2400 return false;
2401 }
2402 }
2403
2404 /* If op0 is an external or constant defs use a vector type of
2405 the same size as the output vector type. */
2406 if (!vectype_in)
2407 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2408 if (vec_stmt)
2409 gcc_assert (vectype_in);
2410 if (!vectype_in)
2411 {
2412 if (dump_enabled_p ())
2413 {
2414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2415 "no vectype for scalar type ");
2416 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2417 }
2418
2419 return false;
2420 }
2421
2422 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2423 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2424 if (nunits_in < nunits_out)
2425 modifier = NARROW;
2426 else if (nunits_out == nunits_in)
2427 modifier = NONE;
2428 else
2429 modifier = WIDEN;
2430
2431 /* Multiple types in SLP are handled by creating the appropriate number of
2432 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2433 case of SLP. */
2434 if (slp_node || PURE_SLP_STMT (stmt_info))
2435 ncopies = 1;
2436 else if (modifier == NARROW)
2437 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2438 else
2439 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2440
2441 /* Sanity check: make sure that at least one copy of the vectorized stmt
2442 needs to be generated. */
2443 gcc_assert (ncopies >= 1);
2444
2445 /* Supportable by target? */
2446 switch (modifier)
2447 {
2448 case NONE:
2449 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2450 return false;
2451 if (supportable_convert_operation (code, vectype_out, vectype_in,
2452 &decl1, &code1))
2453 break;
2454 /* FALLTHRU */
2455 unsupported:
2456 if (dump_enabled_p ())
2457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2458 "conversion not supported by target.");
2459 return false;
2460
2461 case WIDEN:
2462 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2463 &code1, &code2, &multi_step_cvt,
2464 &interm_types))
2465 {
2466 /* Binary widening operation can only be supported directly by the
2467 architecture. */
2468 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2469 break;
2470 }
2471
2472 if (code != FLOAT_EXPR
2473 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2474 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2475 goto unsupported;
2476
2477 rhs_mode = TYPE_MODE (rhs_type);
2478 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2479 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2480 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2481 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2482 {
2483 cvt_type
2484 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2485 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2486 if (cvt_type == NULL_TREE)
2487 goto unsupported;
2488
2489 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2490 {
2491 if (!supportable_convert_operation (code, vectype_out,
2492 cvt_type, &decl1, &codecvt1))
2493 goto unsupported;
2494 }
2495 else if (!supportable_widening_operation (code, stmt, vectype_out,
2496 cvt_type, &codecvt1,
2497 &codecvt2, &multi_step_cvt,
2498 &interm_types))
2499 continue;
2500 else
2501 gcc_assert (multi_step_cvt == 0);
2502
2503 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2504 vectype_in, &code1, &code2,
2505 &multi_step_cvt, &interm_types))
2506 break;
2507 }
2508
2509 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2510 goto unsupported;
2511
2512 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2513 codecvt2 = ERROR_MARK;
2514 else
2515 {
2516 multi_step_cvt++;
2517 interm_types.safe_push (cvt_type);
2518 cvt_type = NULL_TREE;
2519 }
2520 break;
2521
2522 case NARROW:
2523 gcc_assert (op_type == unary_op);
2524 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2525 &code1, &multi_step_cvt,
2526 &interm_types))
2527 break;
2528
2529 if (code != FIX_TRUNC_EXPR
2530 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2531 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2532 goto unsupported;
2533
2534 rhs_mode = TYPE_MODE (rhs_type);
2535 cvt_type
2536 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2537 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2538 if (cvt_type == NULL_TREE)
2539 goto unsupported;
2540 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2541 &decl1, &codecvt1))
2542 goto unsupported;
2543 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2544 &code1, &multi_step_cvt,
2545 &interm_types))
2546 break;
2547 goto unsupported;
2548
2549 default:
2550 gcc_unreachable ();
2551 }
2552
2553 if (!vec_stmt) /* transformation not required. */
2554 {
2555 if (dump_enabled_p ())
2556 dump_printf_loc (MSG_NOTE, vect_location,
2557 "=== vectorizable_conversion ===");
2558 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2559 {
2560 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2561 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2562 }
2563 else if (modifier == NARROW)
2564 {
2565 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2566 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2567 }
2568 else
2569 {
2570 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2571 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2572 }
2573 interm_types.release ();
2574 return true;
2575 }
2576
2577 /** Transform. **/
2578 if (dump_enabled_p ())
2579 dump_printf_loc (MSG_NOTE, vect_location,
2580 "transform conversion. ncopies = %d.", ncopies);
2581
2582 if (op_type == binary_op)
2583 {
2584 if (CONSTANT_CLASS_P (op0))
2585 op0 = fold_convert (TREE_TYPE (op1), op0);
2586 else if (CONSTANT_CLASS_P (op1))
2587 op1 = fold_convert (TREE_TYPE (op0), op1);
2588 }
2589
2590 /* In case of multi-step conversion, we first generate conversion operations
2591 to the intermediate types, and then from that types to the final one.
2592 We create vector destinations for the intermediate type (TYPES) received
2593 from supportable_*_operation, and store them in the correct order
2594 for future use in vect_create_vectorized_*_stmts (). */
2595 vec_dsts.create (multi_step_cvt + 1);
2596 vec_dest = vect_create_destination_var (scalar_dest,
2597 (cvt_type && modifier == WIDEN)
2598 ? cvt_type : vectype_out);
2599 vec_dsts.quick_push (vec_dest);
2600
2601 if (multi_step_cvt)
2602 {
2603 for (i = interm_types.length () - 1;
2604 interm_types.iterate (i, &intermediate_type); i--)
2605 {
2606 vec_dest = vect_create_destination_var (scalar_dest,
2607 intermediate_type);
2608 vec_dsts.quick_push (vec_dest);
2609 }
2610 }
2611
2612 if (cvt_type)
2613 vec_dest = vect_create_destination_var (scalar_dest,
2614 modifier == WIDEN
2615 ? vectype_out : cvt_type);
2616
2617 if (!slp_node)
2618 {
2619 if (modifier == WIDEN)
2620 {
2621 vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1);
2622 if (op_type == binary_op)
2623 vec_oprnds1.create (1);
2624 }
2625 else if (modifier == NARROW)
2626 vec_oprnds0.create (
2627 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2628 }
2629 else if (code == WIDEN_LSHIFT_EXPR)
2630 vec_oprnds1.create (slp_node->vec_stmts_size);
2631
2632 last_oprnd = op0;
2633 prev_stmt_info = NULL;
2634 switch (modifier)
2635 {
2636 case NONE:
2637 for (j = 0; j < ncopies; j++)
2638 {
2639 if (j == 0)
2640 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2641 -1);
2642 else
2643 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2644
2645 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2646 {
2647 /* Arguments are ready, create the new vector stmt. */
2648 if (code1 == CALL_EXPR)
2649 {
2650 new_stmt = gimple_build_call (decl1, 1, vop0);
2651 new_temp = make_ssa_name (vec_dest, new_stmt);
2652 gimple_call_set_lhs (new_stmt, new_temp);
2653 }
2654 else
2655 {
2656 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2657 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2658 vop0, NULL);
2659 new_temp = make_ssa_name (vec_dest, new_stmt);
2660 gimple_assign_set_lhs (new_stmt, new_temp);
2661 }
2662
2663 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2664 if (slp_node)
2665 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2666 }
2667
2668 if (j == 0)
2669 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2670 else
2671 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2672 prev_stmt_info = vinfo_for_stmt (new_stmt);
2673 }
2674 break;
2675
2676 case WIDEN:
2677 /* In case the vectorization factor (VF) is bigger than the number
2678 of elements that we can fit in a vectype (nunits), we have to
2679 generate more than one vector stmt - i.e - we need to "unroll"
2680 the vector stmt by a factor VF/nunits. */
2681 for (j = 0; j < ncopies; j++)
2682 {
2683 /* Handle uses. */
2684 if (j == 0)
2685 {
2686 if (slp_node)
2687 {
2688 if (code == WIDEN_LSHIFT_EXPR)
2689 {
2690 unsigned int k;
2691
2692 vec_oprnd1 = op1;
2693 /* Store vec_oprnd1 for every vector stmt to be created
2694 for SLP_NODE. We check during the analysis that all
2695 the shift arguments are the same. */
2696 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2697 vec_oprnds1.quick_push (vec_oprnd1);
2698
2699 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2700 slp_node, -1);
2701 }
2702 else
2703 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2704 &vec_oprnds1, slp_node, -1);
2705 }
2706 else
2707 {
2708 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2709 vec_oprnds0.quick_push (vec_oprnd0);
2710 if (op_type == binary_op)
2711 {
2712 if (code == WIDEN_LSHIFT_EXPR)
2713 vec_oprnd1 = op1;
2714 else
2715 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2716 NULL);
2717 vec_oprnds1.quick_push (vec_oprnd1);
2718 }
2719 }
2720 }
2721 else
2722 {
2723 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2724 vec_oprnds0.truncate (0);
2725 vec_oprnds0.quick_push (vec_oprnd0);
2726 if (op_type == binary_op)
2727 {
2728 if (code == WIDEN_LSHIFT_EXPR)
2729 vec_oprnd1 = op1;
2730 else
2731 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2732 vec_oprnd1);
2733 vec_oprnds1.truncate (0);
2734 vec_oprnds1.quick_push (vec_oprnd1);
2735 }
2736 }
2737
2738 /* Arguments are ready. Create the new vector stmts. */
2739 for (i = multi_step_cvt; i >= 0; i--)
2740 {
2741 tree this_dest = vec_dsts[i];
2742 enum tree_code c1 = code1, c2 = code2;
2743 if (i == 0 && codecvt2 != ERROR_MARK)
2744 {
2745 c1 = codecvt1;
2746 c2 = codecvt2;
2747 }
2748 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2749 &vec_oprnds1,
2750 stmt, this_dest, gsi,
2751 c1, c2, decl1, decl2,
2752 op_type);
2753 }
2754
2755 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2756 {
2757 if (cvt_type)
2758 {
2759 if (codecvt1 == CALL_EXPR)
2760 {
2761 new_stmt = gimple_build_call (decl1, 1, vop0);
2762 new_temp = make_ssa_name (vec_dest, new_stmt);
2763 gimple_call_set_lhs (new_stmt, new_temp);
2764 }
2765 else
2766 {
2767 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2768 new_temp = make_ssa_name (vec_dest, NULL);
2769 new_stmt = gimple_build_assign_with_ops (codecvt1,
2770 new_temp,
2771 vop0, NULL);
2772 }
2773
2774 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2775 }
2776 else
2777 new_stmt = SSA_NAME_DEF_STMT (vop0);
2778
2779 if (slp_node)
2780 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2781 else
2782 {
2783 if (!prev_stmt_info)
2784 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2785 else
2786 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2787 prev_stmt_info = vinfo_for_stmt (new_stmt);
2788 }
2789 }
2790 }
2791
2792 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2793 break;
2794
2795 case NARROW:
2796 /* In case the vectorization factor (VF) is bigger than the number
2797 of elements that we can fit in a vectype (nunits), we have to
2798 generate more than one vector stmt - i.e - we need to "unroll"
2799 the vector stmt by a factor VF/nunits. */
2800 for (j = 0; j < ncopies; j++)
2801 {
2802 /* Handle uses. */
2803 if (slp_node)
2804 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2805 slp_node, -1);
2806 else
2807 {
2808 vec_oprnds0.truncate (0);
2809 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2810 vect_pow2 (multi_step_cvt) - 1);
2811 }
2812
2813 /* Arguments are ready. Create the new vector stmts. */
2814 if (cvt_type)
2815 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2816 {
2817 if (codecvt1 == CALL_EXPR)
2818 {
2819 new_stmt = gimple_build_call (decl1, 1, vop0);
2820 new_temp = make_ssa_name (vec_dest, new_stmt);
2821 gimple_call_set_lhs (new_stmt, new_temp);
2822 }
2823 else
2824 {
2825 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2826 new_temp = make_ssa_name (vec_dest, NULL);
2827 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2828 vop0, NULL);
2829 }
2830
2831 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2832 vec_oprnds0[i] = new_temp;
2833 }
2834
2835 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2836 stmt, vec_dsts, gsi,
2837 slp_node, code1,
2838 &prev_stmt_info);
2839 }
2840
2841 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2842 break;
2843 }
2844
2845 vec_oprnds0.release ();
2846 vec_oprnds1.release ();
2847 vec_dsts.release ();
2848 interm_types.release ();
2849
2850 return true;
2851 }
2852
2853
2854 /* Function vectorizable_assignment.
2855
2856 Check if STMT performs an assignment (copy) that can be vectorized.
2857 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2858 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2859 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2860
2861 static bool
2862 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2863 gimple *vec_stmt, slp_tree slp_node)
2864 {
2865 tree vec_dest;
2866 tree scalar_dest;
2867 tree op;
2868 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2869 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2870 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2871 tree new_temp;
2872 tree def;
2873 gimple def_stmt;
2874 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2875 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2876 int ncopies;
2877 int i, j;
2878 vec<tree> vec_oprnds = vNULL;
2879 tree vop;
2880 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2881 gimple new_stmt = NULL;
2882 stmt_vec_info prev_stmt_info = NULL;
2883 enum tree_code code;
2884 tree vectype_in;
2885
2886 /* Multiple types in SLP are handled by creating the appropriate number of
2887 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2888 case of SLP. */
2889 if (slp_node || PURE_SLP_STMT (stmt_info))
2890 ncopies = 1;
2891 else
2892 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2893
2894 gcc_assert (ncopies >= 1);
2895
2896 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2897 return false;
2898
2899 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2900 return false;
2901
2902 /* Is vectorizable assignment? */
2903 if (!is_gimple_assign (stmt))
2904 return false;
2905
2906 scalar_dest = gimple_assign_lhs (stmt);
2907 if (TREE_CODE (scalar_dest) != SSA_NAME)
2908 return false;
2909
2910 code = gimple_assign_rhs_code (stmt);
2911 if (gimple_assign_single_p (stmt)
2912 || code == PAREN_EXPR
2913 || CONVERT_EXPR_CODE_P (code))
2914 op = gimple_assign_rhs1 (stmt);
2915 else
2916 return false;
2917
2918 if (code == VIEW_CONVERT_EXPR)
2919 op = TREE_OPERAND (op, 0);
2920
2921 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2922 &def_stmt, &def, &dt[0], &vectype_in))
2923 {
2924 if (dump_enabled_p ())
2925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2926 "use not simple.");
2927 return false;
2928 }
2929
2930 /* We can handle NOP_EXPR conversions that do not change the number
2931 of elements or the vector size. */
2932 if ((CONVERT_EXPR_CODE_P (code)
2933 || code == VIEW_CONVERT_EXPR)
2934 && (!vectype_in
2935 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2936 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2937 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2938 return false;
2939
2940 /* We do not handle bit-precision changes. */
2941 if ((CONVERT_EXPR_CODE_P (code)
2942 || code == VIEW_CONVERT_EXPR)
2943 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2944 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2945 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2946 || ((TYPE_PRECISION (TREE_TYPE (op))
2947 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2948 /* But a conversion that does not change the bit-pattern is ok. */
2949 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2950 > TYPE_PRECISION (TREE_TYPE (op)))
2951 && TYPE_UNSIGNED (TREE_TYPE (op))))
2952 {
2953 if (dump_enabled_p ())
2954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2955 "type conversion to/from bit-precision "
2956 "unsupported.");
2957 return false;
2958 }
2959
2960 if (!vec_stmt) /* transformation not required. */
2961 {
2962 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2963 if (dump_enabled_p ())
2964 dump_printf_loc (MSG_NOTE, vect_location,
2965 "=== vectorizable_assignment ===");
2966 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2967 return true;
2968 }
2969
2970 /** Transform. **/
2971 if (dump_enabled_p ())
2972 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
2973
2974 /* Handle def. */
2975 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2976
2977 /* Handle use. */
2978 for (j = 0; j < ncopies; j++)
2979 {
2980 /* Handle uses. */
2981 if (j == 0)
2982 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2983 else
2984 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2985
2986 /* Arguments are ready. create the new vector stmt. */
2987 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2988 {
2989 if (CONVERT_EXPR_CODE_P (code)
2990 || code == VIEW_CONVERT_EXPR)
2991 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2992 new_stmt = gimple_build_assign (vec_dest, vop);
2993 new_temp = make_ssa_name (vec_dest, new_stmt);
2994 gimple_assign_set_lhs (new_stmt, new_temp);
2995 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2996 if (slp_node)
2997 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2998 }
2999
3000 if (slp_node)
3001 continue;
3002
3003 if (j == 0)
3004 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3005 else
3006 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3007
3008 prev_stmt_info = vinfo_for_stmt (new_stmt);
3009 }
3010
3011 vec_oprnds.release ();
3012 return true;
3013 }
3014
3015
3016 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3017 either as shift by a scalar or by a vector. */
3018
3019 bool
3020 vect_supportable_shift (enum tree_code code, tree scalar_type)
3021 {
3022
3023 enum machine_mode vec_mode;
3024 optab optab;
3025 int icode;
3026 tree vectype;
3027
3028 vectype = get_vectype_for_scalar_type (scalar_type);
3029 if (!vectype)
3030 return false;
3031
3032 optab = optab_for_tree_code (code, vectype, optab_scalar);
3033 if (!optab
3034 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3035 {
3036 optab = optab_for_tree_code (code, vectype, optab_vector);
3037 if (!optab
3038 || (optab_handler (optab, TYPE_MODE (vectype))
3039 == CODE_FOR_nothing))
3040 return false;
3041 }
3042
3043 vec_mode = TYPE_MODE (vectype);
3044 icode = (int) optab_handler (optab, vec_mode);
3045 if (icode == CODE_FOR_nothing)
3046 return false;
3047
3048 return true;
3049 }
3050
3051
3052 /* Function vectorizable_shift.
3053
3054 Check if STMT performs a shift operation that can be vectorized.
3055 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3056 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3057 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3058
3059 static bool
3060 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3061 gimple *vec_stmt, slp_tree slp_node)
3062 {
3063 tree vec_dest;
3064 tree scalar_dest;
3065 tree op0, op1 = NULL;
3066 tree vec_oprnd1 = NULL_TREE;
3067 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3068 tree vectype;
3069 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3070 enum tree_code code;
3071 enum machine_mode vec_mode;
3072 tree new_temp;
3073 optab optab;
3074 int icode;
3075 enum machine_mode optab_op2_mode;
3076 tree def;
3077 gimple def_stmt;
3078 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3079 gimple new_stmt = NULL;
3080 stmt_vec_info prev_stmt_info;
3081 int nunits_in;
3082 int nunits_out;
3083 tree vectype_out;
3084 tree op1_vectype;
3085 int ncopies;
3086 int j, i;
3087 vec<tree> vec_oprnds0 = vNULL;
3088 vec<tree> vec_oprnds1 = vNULL;
3089 tree vop0, vop1;
3090 unsigned int k;
3091 bool scalar_shift_arg = true;
3092 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3093 int vf;
3094
3095 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3096 return false;
3097
3098 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3099 return false;
3100
3101 /* Is STMT a vectorizable binary/unary operation? */
3102 if (!is_gimple_assign (stmt))
3103 return false;
3104
3105 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3106 return false;
3107
3108 code = gimple_assign_rhs_code (stmt);
3109
3110 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3111 || code == RROTATE_EXPR))
3112 return false;
3113
3114 scalar_dest = gimple_assign_lhs (stmt);
3115 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3116 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3117 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3118 {
3119 if (dump_enabled_p ())
3120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3121 "bit-precision shifts not supported.");
3122 return false;
3123 }
3124
3125 op0 = gimple_assign_rhs1 (stmt);
3126 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3127 &def_stmt, &def, &dt[0], &vectype))
3128 {
3129 if (dump_enabled_p ())
3130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3131 "use not simple.");
3132 return false;
3133 }
3134 /* If op0 is an external or constant def use a vector type with
3135 the same size as the output vector type. */
3136 if (!vectype)
3137 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3138 if (vec_stmt)
3139 gcc_assert (vectype);
3140 if (!vectype)
3141 {
3142 if (dump_enabled_p ())
3143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3144 "no vectype for scalar type ");
3145 return false;
3146 }
3147
3148 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3149 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3150 if (nunits_out != nunits_in)
3151 return false;
3152
3153 op1 = gimple_assign_rhs2 (stmt);
3154 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3155 &def, &dt[1], &op1_vectype))
3156 {
3157 if (dump_enabled_p ())
3158 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3159 "use not simple.");
3160 return false;
3161 }
3162
3163 if (loop_vinfo)
3164 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3165 else
3166 vf = 1;
3167
3168 /* Multiple types in SLP are handled by creating the appropriate number of
3169 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3170 case of SLP. */
3171 if (slp_node || PURE_SLP_STMT (stmt_info))
3172 ncopies = 1;
3173 else
3174 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3175
3176 gcc_assert (ncopies >= 1);
3177
3178 /* Determine whether the shift amount is a vector, or scalar. If the
3179 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3180
3181 if (dt[1] == vect_internal_def && !slp_node)
3182 scalar_shift_arg = false;
3183 else if (dt[1] == vect_constant_def
3184 || dt[1] == vect_external_def
3185 || dt[1] == vect_internal_def)
3186 {
3187 /* In SLP, need to check whether the shift count is the same,
3188 in loops if it is a constant or invariant, it is always
3189 a scalar shift. */
3190 if (slp_node)
3191 {
3192 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3193 gimple slpstmt;
3194
3195 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
3196 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3197 scalar_shift_arg = false;
3198 }
3199 }
3200 else
3201 {
3202 if (dump_enabled_p ())
3203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3204 "operand mode requires invariant argument.");
3205 return false;
3206 }
3207
3208 /* Vector shifted by vector. */
3209 if (!scalar_shift_arg)
3210 {
3211 optab = optab_for_tree_code (code, vectype, optab_vector);
3212 if (dump_enabled_p ())
3213 dump_printf_loc (MSG_NOTE, vect_location,
3214 "vector/vector shift/rotate found.");
3215
3216 if (!op1_vectype)
3217 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3218 if (op1_vectype == NULL_TREE
3219 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3220 {
3221 if (dump_enabled_p ())
3222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3223 "unusable type for last operand in"
3224 " vector/vector shift/rotate.");
3225 return false;
3226 }
3227 }
3228 /* See if the machine has a vector shifted by scalar insn and if not
3229 then see if it has a vector shifted by vector insn. */
3230 else
3231 {
3232 optab = optab_for_tree_code (code, vectype, optab_scalar);
3233 if (optab
3234 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3235 {
3236 if (dump_enabled_p ())
3237 dump_printf_loc (MSG_NOTE, vect_location,
3238 "vector/scalar shift/rotate found.");
3239 }
3240 else
3241 {
3242 optab = optab_for_tree_code (code, vectype, optab_vector);
3243 if (optab
3244 && (optab_handler (optab, TYPE_MODE (vectype))
3245 != CODE_FOR_nothing))
3246 {
3247 scalar_shift_arg = false;
3248
3249 if (dump_enabled_p ())
3250 dump_printf_loc (MSG_NOTE, vect_location,
3251 "vector/vector shift/rotate found.");
3252
3253 /* Unlike the other binary operators, shifts/rotates have
3254 the rhs being int, instead of the same type as the lhs,
3255 so make sure the scalar is the right type if we are
3256 dealing with vectors of long long/long/short/char. */
3257 if (dt[1] == vect_constant_def)
3258 op1 = fold_convert (TREE_TYPE (vectype), op1);
3259 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3260 TREE_TYPE (op1)))
3261 {
3262 if (slp_node
3263 && TYPE_MODE (TREE_TYPE (vectype))
3264 != TYPE_MODE (TREE_TYPE (op1)))
3265 {
3266 if (dump_enabled_p ())
3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3268 "unusable type for last operand in"
3269 " vector/vector shift/rotate.");
3270 return false;
3271 }
3272 if (vec_stmt && !slp_node)
3273 {
3274 op1 = fold_convert (TREE_TYPE (vectype), op1);
3275 op1 = vect_init_vector (stmt, op1,
3276 TREE_TYPE (vectype), NULL);
3277 }
3278 }
3279 }
3280 }
3281 }
3282
3283 /* Supportable by target? */
3284 if (!optab)
3285 {
3286 if (dump_enabled_p ())
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3288 "no optab.");
3289 return false;
3290 }
3291 vec_mode = TYPE_MODE (vectype);
3292 icode = (int) optab_handler (optab, vec_mode);
3293 if (icode == CODE_FOR_nothing)
3294 {
3295 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3297 "op not supported by target.");
3298 /* Check only during analysis. */
3299 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3300 || (vf < vect_min_worthwhile_factor (code)
3301 && !vec_stmt))
3302 return false;
3303 if (dump_enabled_p ())
3304 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3305 }
3306
3307 /* Worthwhile without SIMD support? Check only during analysis. */
3308 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3309 && vf < vect_min_worthwhile_factor (code)
3310 && !vec_stmt)
3311 {
3312 if (dump_enabled_p ())
3313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3314 "not worthwhile without SIMD support.");
3315 return false;
3316 }
3317
3318 if (!vec_stmt) /* transformation not required. */
3319 {
3320 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3321 if (dump_enabled_p ())
3322 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
3323 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3324 return true;
3325 }
3326
3327 /** Transform. **/
3328
3329 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_NOTE, vect_location,
3331 "transform binary/unary operation.");
3332
3333 /* Handle def. */
3334 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3335
3336 prev_stmt_info = NULL;
3337 for (j = 0; j < ncopies; j++)
3338 {
3339 /* Handle uses. */
3340 if (j == 0)
3341 {
3342 if (scalar_shift_arg)
3343 {
3344 /* Vector shl and shr insn patterns can be defined with scalar
3345 operand 2 (shift operand). In this case, use constant or loop
3346 invariant op1 directly, without extending it to vector mode
3347 first. */
3348 optab_op2_mode = insn_data[icode].operand[2].mode;
3349 if (!VECTOR_MODE_P (optab_op2_mode))
3350 {
3351 if (dump_enabled_p ())
3352 dump_printf_loc (MSG_NOTE, vect_location,
3353 "operand 1 using scalar mode.");
3354 vec_oprnd1 = op1;
3355 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
3356 vec_oprnds1.quick_push (vec_oprnd1);
3357 if (slp_node)
3358 {
3359 /* Store vec_oprnd1 for every vector stmt to be created
3360 for SLP_NODE. We check during the analysis that all
3361 the shift arguments are the same.
3362 TODO: Allow different constants for different vector
3363 stmts generated for an SLP instance. */
3364 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3365 vec_oprnds1.quick_push (vec_oprnd1);
3366 }
3367 }
3368 }
3369
3370 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3371 (a special case for certain kind of vector shifts); otherwise,
3372 operand 1 should be of a vector type (the usual case). */
3373 if (vec_oprnd1)
3374 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3375 slp_node, -1);
3376 else
3377 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3378 slp_node, -1);
3379 }
3380 else
3381 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3382
3383 /* Arguments are ready. Create the new vector stmt. */
3384 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3385 {
3386 vop1 = vec_oprnds1[i];
3387 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3388 new_temp = make_ssa_name (vec_dest, new_stmt);
3389 gimple_assign_set_lhs (new_stmt, new_temp);
3390 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3391 if (slp_node)
3392 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3393 }
3394
3395 if (slp_node)
3396 continue;
3397
3398 if (j == 0)
3399 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3400 else
3401 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3402 prev_stmt_info = vinfo_for_stmt (new_stmt);
3403 }
3404
3405 vec_oprnds0.release ();
3406 vec_oprnds1.release ();
3407
3408 return true;
3409 }
3410
3411
3412 static tree permute_vec_elements (tree, tree, tree, gimple,
3413 gimple_stmt_iterator *);
3414
3415
3416 /* Function vectorizable_operation.
3417
3418 Check if STMT performs a binary, unary or ternary operation that can
3419 be vectorized.
3420 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3421 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3422 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3423
3424 static bool
3425 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3426 gimple *vec_stmt, slp_tree slp_node)
3427 {
3428 tree vec_dest;
3429 tree scalar_dest;
3430 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3432 tree vectype;
3433 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3434 enum tree_code code;
3435 enum machine_mode vec_mode;
3436 tree new_temp;
3437 int op_type;
3438 optab optab;
3439 int icode;
3440 tree def;
3441 gimple def_stmt;
3442 enum vect_def_type dt[3]
3443 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3444 gimple new_stmt = NULL;
3445 stmt_vec_info prev_stmt_info;
3446 int nunits_in;
3447 int nunits_out;
3448 tree vectype_out;
3449 int ncopies;
3450 int j, i;
3451 vec<tree> vec_oprnds0 = vNULL;
3452 vec<tree> vec_oprnds1 = vNULL;
3453 vec<tree> vec_oprnds2 = vNULL;
3454 tree vop0, vop1, vop2;
3455 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3456 int vf;
3457
3458 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3459 return false;
3460
3461 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3462 return false;
3463
3464 /* Is STMT a vectorizable binary/unary operation? */
3465 if (!is_gimple_assign (stmt))
3466 return false;
3467
3468 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3469 return false;
3470
3471 code = gimple_assign_rhs_code (stmt);
3472
3473 /* For pointer addition, we should use the normal plus for
3474 the vector addition. */
3475 if (code == POINTER_PLUS_EXPR)
3476 code = PLUS_EXPR;
3477
3478 /* Support only unary or binary operations. */
3479 op_type = TREE_CODE_LENGTH (code);
3480 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3481 {
3482 if (dump_enabled_p ())
3483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3484 "num. args = %d (not unary/binary/ternary op).",
3485 op_type);
3486 return false;
3487 }
3488
3489 scalar_dest = gimple_assign_lhs (stmt);
3490 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3491
3492 /* Most operations cannot handle bit-precision types without extra
3493 truncations. */
3494 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3495 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3496 /* Exception are bitwise binary operations. */
3497 && code != BIT_IOR_EXPR
3498 && code != BIT_XOR_EXPR
3499 && code != BIT_AND_EXPR)
3500 {
3501 if (dump_enabled_p ())
3502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3503 "bit-precision arithmetic not supported.");
3504 return false;
3505 }
3506
3507 op0 = gimple_assign_rhs1 (stmt);
3508 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3509 &def_stmt, &def, &dt[0], &vectype))
3510 {
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3513 "use not simple.");
3514 return false;
3515 }
3516 /* If op0 is an external or constant def use a vector type with
3517 the same size as the output vector type. */
3518 if (!vectype)
3519 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3520 if (vec_stmt)
3521 gcc_assert (vectype);
3522 if (!vectype)
3523 {
3524 if (dump_enabled_p ())
3525 {
3526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3527 "no vectype for scalar type ");
3528 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3529 TREE_TYPE (op0));
3530 }
3531
3532 return false;
3533 }
3534
3535 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3536 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3537 if (nunits_out != nunits_in)
3538 return false;
3539
3540 if (op_type == binary_op || op_type == ternary_op)
3541 {
3542 op1 = gimple_assign_rhs2 (stmt);
3543 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3544 &def, &dt[1]))
3545 {
3546 if (dump_enabled_p ())
3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3548 "use not simple.");
3549 return false;
3550 }
3551 }
3552 if (op_type == ternary_op)
3553 {
3554 op2 = gimple_assign_rhs3 (stmt);
3555 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3556 &def, &dt[2]))
3557 {
3558 if (dump_enabled_p ())
3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3560 "use not simple.");
3561 return false;
3562 }
3563 }
3564
3565 if (loop_vinfo)
3566 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3567 else
3568 vf = 1;
3569
3570 /* Multiple types in SLP are handled by creating the appropriate number of
3571 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3572 case of SLP. */
3573 if (slp_node || PURE_SLP_STMT (stmt_info))
3574 ncopies = 1;
3575 else
3576 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3577
3578 gcc_assert (ncopies >= 1);
3579
3580 /* Shifts are handled in vectorizable_shift (). */
3581 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3582 || code == RROTATE_EXPR)
3583 return false;
3584
3585 /* Supportable by target? */
3586
3587 vec_mode = TYPE_MODE (vectype);
3588 if (code == MULT_HIGHPART_EXPR)
3589 {
3590 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3591 icode = LAST_INSN_CODE;
3592 else
3593 icode = CODE_FOR_nothing;
3594 }
3595 else
3596 {
3597 optab = optab_for_tree_code (code, vectype, optab_default);
3598 if (!optab)
3599 {
3600 if (dump_enabled_p ())
3601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3602 "no optab.");
3603 return false;
3604 }
3605 icode = (int) optab_handler (optab, vec_mode);
3606 }
3607
3608 if (icode == CODE_FOR_nothing)
3609 {
3610 if (dump_enabled_p ())
3611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3612 "op not supported by target.");
3613 /* Check only during analysis. */
3614 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3615 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3616 return false;
3617 if (dump_enabled_p ())
3618 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3619 }
3620
3621 /* Worthwhile without SIMD support? Check only during analysis. */
3622 if (!VECTOR_MODE_P (vec_mode)
3623 && !vec_stmt
3624 && vf < vect_min_worthwhile_factor (code))
3625 {
3626 if (dump_enabled_p ())
3627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3628 "not worthwhile without SIMD support.");
3629 return false;
3630 }
3631
3632 if (!vec_stmt) /* transformation not required. */
3633 {
3634 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3635 if (dump_enabled_p ())
3636 dump_printf_loc (MSG_NOTE, vect_location,
3637 "=== vectorizable_operation ===");
3638 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3639 return true;
3640 }
3641
3642 /** Transform. **/
3643
3644 if (dump_enabled_p ())
3645 dump_printf_loc (MSG_NOTE, vect_location,
3646 "transform binary/unary operation.");
3647
3648 /* Handle def. */
3649 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3650
3651 /* In case the vectorization factor (VF) is bigger than the number
3652 of elements that we can fit in a vectype (nunits), we have to generate
3653 more than one vector stmt - i.e - we need to "unroll" the
3654 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3655 from one copy of the vector stmt to the next, in the field
3656 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3657 stages to find the correct vector defs to be used when vectorizing
3658 stmts that use the defs of the current stmt. The example below
3659 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3660 we need to create 4 vectorized stmts):
3661
3662 before vectorization:
3663 RELATED_STMT VEC_STMT
3664 S1: x = memref - -
3665 S2: z = x + 1 - -
3666
3667 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3668 there):
3669 RELATED_STMT VEC_STMT
3670 VS1_0: vx0 = memref0 VS1_1 -
3671 VS1_1: vx1 = memref1 VS1_2 -
3672 VS1_2: vx2 = memref2 VS1_3 -
3673 VS1_3: vx3 = memref3 - -
3674 S1: x = load - VS1_0
3675 S2: z = x + 1 - -
3676
3677 step2: vectorize stmt S2 (done here):
3678 To vectorize stmt S2 we first need to find the relevant vector
3679 def for the first operand 'x'. This is, as usual, obtained from
3680 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3681 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3682 relevant vector def 'vx0'. Having found 'vx0' we can generate
3683 the vector stmt VS2_0, and as usual, record it in the
3684 STMT_VINFO_VEC_STMT of stmt S2.
3685 When creating the second copy (VS2_1), we obtain the relevant vector
3686 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3687 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3688 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3689 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3690 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3691 chain of stmts and pointers:
3692 RELATED_STMT VEC_STMT
3693 VS1_0: vx0 = memref0 VS1_1 -
3694 VS1_1: vx1 = memref1 VS1_2 -
3695 VS1_2: vx2 = memref2 VS1_3 -
3696 VS1_3: vx3 = memref3 - -
3697 S1: x = load - VS1_0
3698 VS2_0: vz0 = vx0 + v1 VS2_1 -
3699 VS2_1: vz1 = vx1 + v1 VS2_2 -
3700 VS2_2: vz2 = vx2 + v1 VS2_3 -
3701 VS2_3: vz3 = vx3 + v1 - -
3702 S2: z = x + 1 - VS2_0 */
3703
3704 prev_stmt_info = NULL;
3705 for (j = 0; j < ncopies; j++)
3706 {
3707 /* Handle uses. */
3708 if (j == 0)
3709 {
3710 if (op_type == binary_op || op_type == ternary_op)
3711 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3712 slp_node, -1);
3713 else
3714 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3715 slp_node, -1);
3716 if (op_type == ternary_op)
3717 {
3718 vec_oprnds2.create (1);
3719 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3720 stmt,
3721 NULL));
3722 }
3723 }
3724 else
3725 {
3726 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3727 if (op_type == ternary_op)
3728 {
3729 tree vec_oprnd = vec_oprnds2.pop ();
3730 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3731 vec_oprnd));
3732 }
3733 }
3734
3735 /* Arguments are ready. Create the new vector stmt. */
3736 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3737 {
3738 vop1 = ((op_type == binary_op || op_type == ternary_op)
3739 ? vec_oprnds1[i] : NULL_TREE);
3740 vop2 = ((op_type == ternary_op)
3741 ? vec_oprnds2[i] : NULL_TREE);
3742 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3743 vop0, vop1, vop2);
3744 new_temp = make_ssa_name (vec_dest, new_stmt);
3745 gimple_assign_set_lhs (new_stmt, new_temp);
3746 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3747 if (slp_node)
3748 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3749 }
3750
3751 if (slp_node)
3752 continue;
3753
3754 if (j == 0)
3755 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3756 else
3757 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3758 prev_stmt_info = vinfo_for_stmt (new_stmt);
3759 }
3760
3761 vec_oprnds0.release ();
3762 vec_oprnds1.release ();
3763 vec_oprnds2.release ();
3764
3765 return true;
3766 }
3767
3768
3769 /* Function vectorizable_store.
3770
3771 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3772 can be vectorized.
3773 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3774 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3775 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3776
3777 static bool
3778 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3779 slp_tree slp_node)
3780 {
3781 tree scalar_dest;
3782 tree data_ref;
3783 tree op;
3784 tree vec_oprnd = NULL_TREE;
3785 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3786 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3787 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3788 tree elem_type;
3789 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3790 struct loop *loop = NULL;
3791 enum machine_mode vec_mode;
3792 tree dummy;
3793 enum dr_alignment_support alignment_support_scheme;
3794 tree def;
3795 gimple def_stmt;
3796 enum vect_def_type dt;
3797 stmt_vec_info prev_stmt_info = NULL;
3798 tree dataref_ptr = NULL_TREE;
3799 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3800 int ncopies;
3801 int j;
3802 gimple next_stmt, first_stmt = NULL;
3803 bool grouped_store = false;
3804 bool store_lanes_p = false;
3805 unsigned int group_size, i;
3806 vec<tree> dr_chain = vNULL;
3807 vec<tree> oprnds = vNULL;
3808 vec<tree> result_chain = vNULL;
3809 bool inv_p;
3810 vec<tree> vec_oprnds = vNULL;
3811 bool slp = (slp_node != NULL);
3812 unsigned int vec_num;
3813 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3814 tree aggr_type;
3815
3816 if (loop_vinfo)
3817 loop = LOOP_VINFO_LOOP (loop_vinfo);
3818
3819 /* Multiple types in SLP are handled by creating the appropriate number of
3820 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3821 case of SLP. */
3822 if (slp || PURE_SLP_STMT (stmt_info))
3823 ncopies = 1;
3824 else
3825 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3826
3827 gcc_assert (ncopies >= 1);
3828
3829 /* FORNOW. This restriction should be relaxed. */
3830 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3831 {
3832 if (dump_enabled_p ())
3833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3834 "multiple types in nested loop.");
3835 return false;
3836 }
3837
3838 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3839 return false;
3840
3841 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3842 return false;
3843
3844 /* Is vectorizable store? */
3845
3846 if (!is_gimple_assign (stmt))
3847 return false;
3848
3849 scalar_dest = gimple_assign_lhs (stmt);
3850 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3851 && is_pattern_stmt_p (stmt_info))
3852 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3853 if (TREE_CODE (scalar_dest) != ARRAY_REF
3854 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
3855 && TREE_CODE (scalar_dest) != INDIRECT_REF
3856 && TREE_CODE (scalar_dest) != COMPONENT_REF
3857 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3858 && TREE_CODE (scalar_dest) != REALPART_EXPR
3859 && TREE_CODE (scalar_dest) != MEM_REF)
3860 return false;
3861
3862 gcc_assert (gimple_assign_single_p (stmt));
3863 op = gimple_assign_rhs1 (stmt);
3864 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3865 &def, &dt))
3866 {
3867 if (dump_enabled_p ())
3868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3869 "use not simple.");
3870 return false;
3871 }
3872
3873 elem_type = TREE_TYPE (vectype);
3874 vec_mode = TYPE_MODE (vectype);
3875
3876 /* FORNOW. In some cases can vectorize even if data-type not supported
3877 (e.g. - array initialization with 0). */
3878 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3879 return false;
3880
3881 if (!STMT_VINFO_DATA_REF (stmt_info))
3882 return false;
3883
3884 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3885 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3886 size_zero_node) < 0)
3887 {
3888 if (dump_enabled_p ())
3889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3890 "negative step for store.");
3891 return false;
3892 }
3893
3894 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3895 {
3896 grouped_store = true;
3897 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3898 if (!slp && !PURE_SLP_STMT (stmt_info))
3899 {
3900 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3901 if (vect_store_lanes_supported (vectype, group_size))
3902 store_lanes_p = true;
3903 else if (!vect_grouped_store_supported (vectype, group_size))
3904 return false;
3905 }
3906
3907 if (first_stmt == stmt)
3908 {
3909 /* STMT is the leader of the group. Check the operands of all the
3910 stmts of the group. */
3911 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3912 while (next_stmt)
3913 {
3914 gcc_assert (gimple_assign_single_p (next_stmt));
3915 op = gimple_assign_rhs1 (next_stmt);
3916 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3917 &def_stmt, &def, &dt))
3918 {
3919 if (dump_enabled_p ())
3920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3921 "use not simple.");
3922 return false;
3923 }
3924 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3925 }
3926 }
3927 }
3928
3929 if (!vec_stmt) /* transformation not required. */
3930 {
3931 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3932 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3933 NULL, NULL, NULL);
3934 return true;
3935 }
3936
3937 /** Transform. **/
3938
3939 if (grouped_store)
3940 {
3941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3942 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3943
3944 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3945
3946 /* FORNOW */
3947 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3948
3949 /* We vectorize all the stmts of the interleaving group when we
3950 reach the last stmt in the group. */
3951 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3952 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3953 && !slp)
3954 {
3955 *vec_stmt = NULL;
3956 return true;
3957 }
3958
3959 if (slp)
3960 {
3961 grouped_store = false;
3962 /* VEC_NUM is the number of vect stmts to be created for this
3963 group. */
3964 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3965 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3966 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3967 op = gimple_assign_rhs1 (first_stmt);
3968 }
3969 else
3970 /* VEC_NUM is the number of vect stmts to be created for this
3971 group. */
3972 vec_num = group_size;
3973 }
3974 else
3975 {
3976 first_stmt = stmt;
3977 first_dr = dr;
3978 group_size = vec_num = 1;
3979 }
3980
3981 if (dump_enabled_p ())
3982 dump_printf_loc (MSG_NOTE, vect_location,
3983 "transform store. ncopies = %d", ncopies);
3984
3985 dr_chain.create (group_size);
3986 oprnds.create (group_size);
3987
3988 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3989 gcc_assert (alignment_support_scheme);
3990 /* Targets with store-lane instructions must not require explicit
3991 realignment. */
3992 gcc_assert (!store_lanes_p
3993 || alignment_support_scheme == dr_aligned
3994 || alignment_support_scheme == dr_unaligned_supported);
3995
3996 if (store_lanes_p)
3997 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3998 else
3999 aggr_type = vectype;
4000
4001 /* In case the vectorization factor (VF) is bigger than the number
4002 of elements that we can fit in a vectype (nunits), we have to generate
4003 more than one vector stmt - i.e - we need to "unroll" the
4004 vector stmt by a factor VF/nunits. For more details see documentation in
4005 vect_get_vec_def_for_copy_stmt. */
4006
4007 /* In case of interleaving (non-unit grouped access):
4008
4009 S1: &base + 2 = x2
4010 S2: &base = x0
4011 S3: &base + 1 = x1
4012 S4: &base + 3 = x3
4013
4014 We create vectorized stores starting from base address (the access of the
4015 first stmt in the chain (S2 in the above example), when the last store stmt
4016 of the chain (S4) is reached:
4017
4018 VS1: &base = vx2
4019 VS2: &base + vec_size*1 = vx0
4020 VS3: &base + vec_size*2 = vx1
4021 VS4: &base + vec_size*3 = vx3
4022
4023 Then permutation statements are generated:
4024
4025 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4026 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4027 ...
4028
4029 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4030 (the order of the data-refs in the output of vect_permute_store_chain
4031 corresponds to the order of scalar stmts in the interleaving chain - see
4032 the documentation of vect_permute_store_chain()).
4033
4034 In case of both multiple types and interleaving, above vector stores and
4035 permutation stmts are created for every copy. The result vector stmts are
4036 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4037 STMT_VINFO_RELATED_STMT for the next copies.
4038 */
4039
4040 prev_stmt_info = NULL;
4041 for (j = 0; j < ncopies; j++)
4042 {
4043 gimple new_stmt;
4044 gimple ptr_incr;
4045
4046 if (j == 0)
4047 {
4048 if (slp)
4049 {
4050 /* Get vectorized arguments for SLP_NODE. */
4051 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4052 NULL, slp_node, -1);
4053
4054 vec_oprnd = vec_oprnds[0];
4055 }
4056 else
4057 {
4058 /* For interleaved stores we collect vectorized defs for all the
4059 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4060 used as an input to vect_permute_store_chain(), and OPRNDS as
4061 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4062
4063 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4064 OPRNDS are of size 1. */
4065 next_stmt = first_stmt;
4066 for (i = 0; i < group_size; i++)
4067 {
4068 /* Since gaps are not supported for interleaved stores,
4069 GROUP_SIZE is the exact number of stmts in the chain.
4070 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4071 there is no interleaving, GROUP_SIZE is 1, and only one
4072 iteration of the loop will be executed. */
4073 gcc_assert (next_stmt
4074 && gimple_assign_single_p (next_stmt));
4075 op = gimple_assign_rhs1 (next_stmt);
4076
4077 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4078 NULL);
4079 dr_chain.quick_push (vec_oprnd);
4080 oprnds.quick_push (vec_oprnd);
4081 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4082 }
4083 }
4084
4085 /* We should have catched mismatched types earlier. */
4086 gcc_assert (useless_type_conversion_p (vectype,
4087 TREE_TYPE (vec_oprnd)));
4088 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4089 NULL_TREE, &dummy, gsi,
4090 &ptr_incr, false, &inv_p);
4091 gcc_assert (bb_vinfo || !inv_p);
4092 }
4093 else
4094 {
4095 /* For interleaved stores we created vectorized defs for all the
4096 defs stored in OPRNDS in the previous iteration (previous copy).
4097 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4098 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4099 next copy.
4100 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4101 OPRNDS are of size 1. */
4102 for (i = 0; i < group_size; i++)
4103 {
4104 op = oprnds[i];
4105 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4106 &def, &dt);
4107 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4108 dr_chain[i] = vec_oprnd;
4109 oprnds[i] = vec_oprnd;
4110 }
4111 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4112 TYPE_SIZE_UNIT (aggr_type));
4113 }
4114
4115 if (store_lanes_p)
4116 {
4117 tree vec_array;
4118
4119 /* Combine all the vectors into an array. */
4120 vec_array = create_vector_array (vectype, vec_num);
4121 for (i = 0; i < vec_num; i++)
4122 {
4123 vec_oprnd = dr_chain[i];
4124 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4125 }
4126
4127 /* Emit:
4128 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4129 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4130 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4131 gimple_call_set_lhs (new_stmt, data_ref);
4132 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4133 }
4134 else
4135 {
4136 new_stmt = NULL;
4137 if (grouped_store)
4138 {
4139 if (j == 0)
4140 result_chain.create (group_size);
4141 /* Permute. */
4142 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4143 &result_chain);
4144 }
4145
4146 next_stmt = first_stmt;
4147 for (i = 0; i < vec_num; i++)
4148 {
4149 unsigned align, misalign;
4150
4151 if (i > 0)
4152 /* Bump the vector pointer. */
4153 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4154 stmt, NULL_TREE);
4155
4156 if (slp)
4157 vec_oprnd = vec_oprnds[i];
4158 else if (grouped_store)
4159 /* For grouped stores vectorized defs are interleaved in
4160 vect_permute_store_chain(). */
4161 vec_oprnd = result_chain[i];
4162
4163 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4164 build_int_cst (reference_alias_ptr_type
4165 (DR_REF (first_dr)), 0));
4166 align = TYPE_ALIGN_UNIT (vectype);
4167 if (aligned_access_p (first_dr))
4168 misalign = 0;
4169 else if (DR_MISALIGNMENT (first_dr) == -1)
4170 {
4171 TREE_TYPE (data_ref)
4172 = build_aligned_type (TREE_TYPE (data_ref),
4173 TYPE_ALIGN (elem_type));
4174 align = TYPE_ALIGN_UNIT (elem_type);
4175 misalign = 0;
4176 }
4177 else
4178 {
4179 TREE_TYPE (data_ref)
4180 = build_aligned_type (TREE_TYPE (data_ref),
4181 TYPE_ALIGN (elem_type));
4182 misalign = DR_MISALIGNMENT (first_dr);
4183 }
4184 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4185 misalign);
4186
4187 /* Arguments are ready. Create the new vector stmt. */
4188 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4190
4191 if (slp)
4192 continue;
4193
4194 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4195 if (!next_stmt)
4196 break;
4197 }
4198 }
4199 if (!slp)
4200 {
4201 if (j == 0)
4202 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4203 else
4204 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4205 prev_stmt_info = vinfo_for_stmt (new_stmt);
4206 }
4207 }
4208
4209 dr_chain.release ();
4210 oprnds.release ();
4211 result_chain.release ();
4212 vec_oprnds.release ();
4213
4214 return true;
4215 }
4216
4217 /* Given a vector type VECTYPE and permutation SEL returns
4218 the VECTOR_CST mask that implements the permutation of the
4219 vector elements. If that is impossible to do, returns NULL. */
4220
4221 tree
4222 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4223 {
4224 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4225 int i, nunits;
4226
4227 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4228
4229 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4230 return NULL;
4231
4232 mask_elt_type = lang_hooks.types.type_for_mode
4233 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4234 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4235
4236 mask_elts = XALLOCAVEC (tree, nunits);
4237 for (i = nunits - 1; i >= 0; i--)
4238 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4239 mask_vec = build_vector (mask_type, mask_elts);
4240
4241 return mask_vec;
4242 }
4243
4244 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4245 reversal of the vector elements. If that is impossible to do,
4246 returns NULL. */
4247
4248 static tree
4249 perm_mask_for_reverse (tree vectype)
4250 {
4251 int i, nunits;
4252 unsigned char *sel;
4253
4254 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4255 sel = XALLOCAVEC (unsigned char, nunits);
4256
4257 for (i = 0; i < nunits; ++i)
4258 sel[i] = nunits - 1 - i;
4259
4260 return vect_gen_perm_mask (vectype, sel);
4261 }
4262
4263 /* Given a vector variable X and Y, that was generated for the scalar
4264 STMT, generate instructions to permute the vector elements of X and Y
4265 using permutation mask MASK_VEC, insert them at *GSI and return the
4266 permuted vector variable. */
4267
4268 static tree
4269 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4270 gimple_stmt_iterator *gsi)
4271 {
4272 tree vectype = TREE_TYPE (x);
4273 tree perm_dest, data_ref;
4274 gimple perm_stmt;
4275
4276 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4277 data_ref = make_ssa_name (perm_dest, NULL);
4278
4279 /* Generate the permute statement. */
4280 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4281 x, y, mask_vec);
4282 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4283
4284 return data_ref;
4285 }
4286
4287 /* vectorizable_load.
4288
4289 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4290 can be vectorized.
4291 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4292 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4293 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4294
4295 static bool
4296 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4297 slp_tree slp_node, slp_instance slp_node_instance)
4298 {
4299 tree scalar_dest;
4300 tree vec_dest = NULL;
4301 tree data_ref = NULL;
4302 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4303 stmt_vec_info prev_stmt_info;
4304 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4305 struct loop *loop = NULL;
4306 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4307 bool nested_in_vect_loop = false;
4308 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4309 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4310 tree elem_type;
4311 tree new_temp;
4312 enum machine_mode mode;
4313 gimple new_stmt = NULL;
4314 tree dummy;
4315 enum dr_alignment_support alignment_support_scheme;
4316 tree dataref_ptr = NULL_TREE;
4317 gimple ptr_incr;
4318 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4319 int ncopies;
4320 int i, j, group_size, group_gap;
4321 tree msq = NULL_TREE, lsq;
4322 tree offset = NULL_TREE;
4323 tree realignment_token = NULL_TREE;
4324 gimple phi = NULL;
4325 vec<tree> dr_chain = vNULL;
4326 bool grouped_load = false;
4327 bool load_lanes_p = false;
4328 gimple first_stmt;
4329 bool inv_p;
4330 bool negative = false;
4331 bool compute_in_loop = false;
4332 struct loop *at_loop;
4333 int vec_num;
4334 bool slp = (slp_node != NULL);
4335 bool slp_perm = false;
4336 enum tree_code code;
4337 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4338 int vf;
4339 tree aggr_type;
4340 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4341 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4342 int gather_scale = 1;
4343 enum vect_def_type gather_dt = vect_unknown_def_type;
4344
4345 if (loop_vinfo)
4346 {
4347 loop = LOOP_VINFO_LOOP (loop_vinfo);
4348 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4349 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4350 }
4351 else
4352 vf = 1;
4353
4354 /* Multiple types in SLP are handled by creating the appropriate number of
4355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4356 case of SLP. */
4357 if (slp || PURE_SLP_STMT (stmt_info))
4358 ncopies = 1;
4359 else
4360 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4361
4362 gcc_assert (ncopies >= 1);
4363
4364 /* FORNOW. This restriction should be relaxed. */
4365 if (nested_in_vect_loop && ncopies > 1)
4366 {
4367 if (dump_enabled_p ())
4368 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4369 "multiple types in nested loop.");
4370 return false;
4371 }
4372
4373 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4374 return false;
4375
4376 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4377 return false;
4378
4379 /* Is vectorizable load? */
4380 if (!is_gimple_assign (stmt))
4381 return false;
4382
4383 scalar_dest = gimple_assign_lhs (stmt);
4384 if (TREE_CODE (scalar_dest) != SSA_NAME)
4385 return false;
4386
4387 code = gimple_assign_rhs_code (stmt);
4388 if (code != ARRAY_REF
4389 && code != BIT_FIELD_REF
4390 && code != INDIRECT_REF
4391 && code != COMPONENT_REF
4392 && code != IMAGPART_EXPR
4393 && code != REALPART_EXPR
4394 && code != MEM_REF
4395 && TREE_CODE_CLASS (code) != tcc_declaration)
4396 return false;
4397
4398 if (!STMT_VINFO_DATA_REF (stmt_info))
4399 return false;
4400
4401 elem_type = TREE_TYPE (vectype);
4402 mode = TYPE_MODE (vectype);
4403
4404 /* FORNOW. In some cases can vectorize even if data-type not supported
4405 (e.g. - data copies). */
4406 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4407 {
4408 if (dump_enabled_p ())
4409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4410 "Aligned load, but unsupported type.");
4411 return false;
4412 }
4413
4414 /* Check if the load is a part of an interleaving chain. */
4415 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4416 {
4417 grouped_load = true;
4418 /* FORNOW */
4419 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4420
4421 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4422 if (!slp && !PURE_SLP_STMT (stmt_info))
4423 {
4424 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4425 if (vect_load_lanes_supported (vectype, group_size))
4426 load_lanes_p = true;
4427 else if (!vect_grouped_load_supported (vectype, group_size))
4428 return false;
4429 }
4430 }
4431
4432
4433 if (STMT_VINFO_GATHER_P (stmt_info))
4434 {
4435 gimple def_stmt;
4436 tree def;
4437 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4438 &gather_off, &gather_scale);
4439 gcc_assert (gather_decl);
4440 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4441 &def_stmt, &def, &gather_dt,
4442 &gather_off_vectype))
4443 {
4444 if (dump_enabled_p ())
4445 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4446 "gather index use not simple.");
4447 return false;
4448 }
4449 }
4450 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4451 ;
4452 else
4453 {
4454 negative = tree_int_cst_compare (nested_in_vect_loop
4455 ? STMT_VINFO_DR_STEP (stmt_info)
4456 : DR_STEP (dr),
4457 size_zero_node) < 0;
4458 if (negative && ncopies > 1)
4459 {
4460 if (dump_enabled_p ())
4461 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4462 "multiple types with negative step.");
4463 return false;
4464 }
4465
4466 if (negative)
4467 {
4468 if (grouped_load)
4469 {
4470 if (dump_enabled_p ())
4471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4472 "negative step for group load not supported");
4473 return false;
4474 }
4475 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4476 if (alignment_support_scheme != dr_aligned
4477 && alignment_support_scheme != dr_unaligned_supported)
4478 {
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4481 "negative step but alignment required.");
4482 return false;
4483 }
4484 if (!perm_mask_for_reverse (vectype))
4485 {
4486 if (dump_enabled_p ())
4487 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4488 "negative step and reversing not supported.");
4489 return false;
4490 }
4491 }
4492 }
4493
4494 if (!vec_stmt) /* transformation not required. */
4495 {
4496 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4497 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4498 return true;
4499 }
4500
4501 if (dump_enabled_p ())
4502 dump_printf_loc (MSG_NOTE, vect_location,
4503 "transform load. ncopies = %d", ncopies);
4504
4505 /** Transform. **/
4506
4507 if (STMT_VINFO_GATHER_P (stmt_info))
4508 {
4509 tree vec_oprnd0 = NULL_TREE, op;
4510 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4511 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4512 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4513 edge pe = loop_preheader_edge (loop);
4514 gimple_seq seq;
4515 basic_block new_bb;
4516 enum { NARROW, NONE, WIDEN } modifier;
4517 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4518
4519 if (nunits == gather_off_nunits)
4520 modifier = NONE;
4521 else if (nunits == gather_off_nunits / 2)
4522 {
4523 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4524 modifier = WIDEN;
4525
4526 for (i = 0; i < gather_off_nunits; ++i)
4527 sel[i] = i | nunits;
4528
4529 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4530 gcc_assert (perm_mask != NULL_TREE);
4531 }
4532 else if (nunits == gather_off_nunits * 2)
4533 {
4534 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4535 modifier = NARROW;
4536
4537 for (i = 0; i < nunits; ++i)
4538 sel[i] = i < gather_off_nunits
4539 ? i : i + nunits - gather_off_nunits;
4540
4541 perm_mask = vect_gen_perm_mask (vectype, sel);
4542 gcc_assert (perm_mask != NULL_TREE);
4543 ncopies *= 2;
4544 }
4545 else
4546 gcc_unreachable ();
4547
4548 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4549 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4550 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4551 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4552 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4553 scaletype = TREE_VALUE (arglist);
4554 gcc_checking_assert (types_compatible_p (srctype, rettype)
4555 && types_compatible_p (srctype, masktype));
4556
4557 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4558
4559 ptr = fold_convert (ptrtype, gather_base);
4560 if (!is_gimple_min_invariant (ptr))
4561 {
4562 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4563 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4564 gcc_assert (!new_bb);
4565 }
4566
4567 /* Currently we support only unconditional gather loads,
4568 so mask should be all ones. */
4569 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4570 mask = build_int_cst (TREE_TYPE (masktype), -1);
4571 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4572 {
4573 REAL_VALUE_TYPE r;
4574 long tmp[6];
4575 for (j = 0; j < 6; ++j)
4576 tmp[j] = -1;
4577 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4578 mask = build_real (TREE_TYPE (masktype), r);
4579 }
4580 else
4581 gcc_unreachable ();
4582 mask = build_vector_from_val (masktype, mask);
4583 mask = vect_init_vector (stmt, mask, masktype, NULL);
4584
4585 scale = build_int_cst (scaletype, gather_scale);
4586
4587 prev_stmt_info = NULL;
4588 for (j = 0; j < ncopies; ++j)
4589 {
4590 if (modifier == WIDEN && (j & 1))
4591 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4592 perm_mask, stmt, gsi);
4593 else if (j == 0)
4594 op = vec_oprnd0
4595 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4596 else
4597 op = vec_oprnd0
4598 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4599
4600 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4601 {
4602 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4603 == TYPE_VECTOR_SUBPARTS (idxtype));
4604 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4605 var = make_ssa_name (var, NULL);
4606 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4607 new_stmt
4608 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4609 op, NULL_TREE);
4610 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4611 op = var;
4612 }
4613
4614 new_stmt
4615 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4616
4617 if (!useless_type_conversion_p (vectype, rettype))
4618 {
4619 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4620 == TYPE_VECTOR_SUBPARTS (rettype));
4621 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4622 op = make_ssa_name (var, new_stmt);
4623 gimple_call_set_lhs (new_stmt, op);
4624 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4625 var = make_ssa_name (vec_dest, NULL);
4626 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4627 new_stmt
4628 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4629 NULL_TREE);
4630 }
4631 else
4632 {
4633 var = make_ssa_name (vec_dest, new_stmt);
4634 gimple_call_set_lhs (new_stmt, var);
4635 }
4636
4637 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4638
4639 if (modifier == NARROW)
4640 {
4641 if ((j & 1) == 0)
4642 {
4643 prev_res = var;
4644 continue;
4645 }
4646 var = permute_vec_elements (prev_res, var,
4647 perm_mask, stmt, gsi);
4648 new_stmt = SSA_NAME_DEF_STMT (var);
4649 }
4650
4651 if (prev_stmt_info == NULL)
4652 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4653 else
4654 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4655 prev_stmt_info = vinfo_for_stmt (new_stmt);
4656 }
4657 return true;
4658 }
4659 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4660 {
4661 gimple_stmt_iterator incr_gsi;
4662 bool insert_after;
4663 gimple incr;
4664 tree offvar;
4665 tree ivstep;
4666 tree running_off;
4667 vec<constructor_elt, va_gc> *v = NULL;
4668 gimple_seq stmts = NULL;
4669 tree stride_base, stride_step, alias_off;
4670
4671 gcc_assert (!nested_in_vect_loop);
4672
4673 stride_base
4674 = fold_build_pointer_plus
4675 (unshare_expr (DR_BASE_ADDRESS (dr)),
4676 size_binop (PLUS_EXPR,
4677 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4678 convert_to_ptrofftype (DR_INIT(dr))));
4679 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
4680
4681 /* For a load with loop-invariant (but other than power-of-2)
4682 stride (i.e. not a grouped access) like so:
4683
4684 for (i = 0; i < n; i += stride)
4685 ... = array[i];
4686
4687 we generate a new induction variable and new accesses to
4688 form a new vector (or vectors, depending on ncopies):
4689
4690 for (j = 0; ; j += VF*stride)
4691 tmp1 = array[j];
4692 tmp2 = array[j + stride];
4693 ...
4694 vectemp = {tmp1, tmp2, ...}
4695 */
4696
4697 ivstep = stride_step;
4698 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4699 build_int_cst (TREE_TYPE (ivstep), vf));
4700
4701 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4702
4703 create_iv (stride_base, ivstep, NULL,
4704 loop, &incr_gsi, insert_after,
4705 &offvar, NULL);
4706 incr = gsi_stmt (incr_gsi);
4707 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4708
4709 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4710 if (stmts)
4711 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4712
4713 prev_stmt_info = NULL;
4714 running_off = offvar;
4715 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
4716 for (j = 0; j < ncopies; j++)
4717 {
4718 tree vec_inv;
4719
4720 vec_alloc (v, nunits);
4721 for (i = 0; i < nunits; i++)
4722 {
4723 tree newref, newoff;
4724 gimple incr;
4725 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4726 running_off, alias_off);
4727
4728 newref = force_gimple_operand_gsi (gsi, newref, true,
4729 NULL_TREE, true,
4730 GSI_SAME_STMT);
4731 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4732 newoff = copy_ssa_name (running_off, NULL);
4733 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4734 running_off, stride_step);
4735 vect_finish_stmt_generation (stmt, incr, gsi);
4736
4737 running_off = newoff;
4738 }
4739
4740 vec_inv = build_constructor (vectype, v);
4741 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4742 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4743
4744 if (j == 0)
4745 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4746 else
4747 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4748 prev_stmt_info = vinfo_for_stmt (new_stmt);
4749 }
4750 return true;
4751 }
4752
4753 if (grouped_load)
4754 {
4755 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4756 if (slp
4757 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
4758 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4759 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4760
4761 /* Check if the chain of loads is already vectorized. */
4762 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4763 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4764 ??? But we can only do so if there is exactly one
4765 as we have no way to get at the rest. Leave the CSE
4766 opportunity alone.
4767 ??? With the group load eventually participating
4768 in multiple different permutations (having multiple
4769 slp nodes which refer to the same group) the CSE
4770 is even wrong code. See PR56270. */
4771 && !slp)
4772 {
4773 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4774 return true;
4775 }
4776 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4777 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4778
4779 /* VEC_NUM is the number of vect stmts to be created for this group. */
4780 if (slp)
4781 {
4782 grouped_load = false;
4783 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4784 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4785 slp_perm = true;
4786 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
4787 }
4788 else
4789 {
4790 vec_num = group_size;
4791 group_gap = 0;
4792 }
4793 }
4794 else
4795 {
4796 first_stmt = stmt;
4797 first_dr = dr;
4798 group_size = vec_num = 1;
4799 group_gap = 0;
4800 }
4801
4802 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4803 gcc_assert (alignment_support_scheme);
4804 /* Targets with load-lane instructions must not require explicit
4805 realignment. */
4806 gcc_assert (!load_lanes_p
4807 || alignment_support_scheme == dr_aligned
4808 || alignment_support_scheme == dr_unaligned_supported);
4809
4810 /* In case the vectorization factor (VF) is bigger than the number
4811 of elements that we can fit in a vectype (nunits), we have to generate
4812 more than one vector stmt - i.e - we need to "unroll" the
4813 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4814 from one copy of the vector stmt to the next, in the field
4815 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4816 stages to find the correct vector defs to be used when vectorizing
4817 stmts that use the defs of the current stmt. The example below
4818 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4819 need to create 4 vectorized stmts):
4820
4821 before vectorization:
4822 RELATED_STMT VEC_STMT
4823 S1: x = memref - -
4824 S2: z = x + 1 - -
4825
4826 step 1: vectorize stmt S1:
4827 We first create the vector stmt VS1_0, and, as usual, record a
4828 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4829 Next, we create the vector stmt VS1_1, and record a pointer to
4830 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4831 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4832 stmts and pointers:
4833 RELATED_STMT VEC_STMT
4834 VS1_0: vx0 = memref0 VS1_1 -
4835 VS1_1: vx1 = memref1 VS1_2 -
4836 VS1_2: vx2 = memref2 VS1_3 -
4837 VS1_3: vx3 = memref3 - -
4838 S1: x = load - VS1_0
4839 S2: z = x + 1 - -
4840
4841 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4842 information we recorded in RELATED_STMT field is used to vectorize
4843 stmt S2. */
4844
4845 /* In case of interleaving (non-unit grouped access):
4846
4847 S1: x2 = &base + 2
4848 S2: x0 = &base
4849 S3: x1 = &base + 1
4850 S4: x3 = &base + 3
4851
4852 Vectorized loads are created in the order of memory accesses
4853 starting from the access of the first stmt of the chain:
4854
4855 VS1: vx0 = &base
4856 VS2: vx1 = &base + vec_size*1
4857 VS3: vx3 = &base + vec_size*2
4858 VS4: vx4 = &base + vec_size*3
4859
4860 Then permutation statements are generated:
4861
4862 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4863 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4864 ...
4865
4866 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4867 (the order of the data-refs in the output of vect_permute_load_chain
4868 corresponds to the order of scalar stmts in the interleaving chain - see
4869 the documentation of vect_permute_load_chain()).
4870 The generation of permutation stmts and recording them in
4871 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4872
4873 In case of both multiple types and interleaving, the vector loads and
4874 permutation stmts above are created for every copy. The result vector
4875 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4876 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4877
4878 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4879 on a target that supports unaligned accesses (dr_unaligned_supported)
4880 we generate the following code:
4881 p = initial_addr;
4882 indx = 0;
4883 loop {
4884 p = p + indx * vectype_size;
4885 vec_dest = *(p);
4886 indx = indx + 1;
4887 }
4888
4889 Otherwise, the data reference is potentially unaligned on a target that
4890 does not support unaligned accesses (dr_explicit_realign_optimized) -
4891 then generate the following code, in which the data in each iteration is
4892 obtained by two vector loads, one from the previous iteration, and one
4893 from the current iteration:
4894 p1 = initial_addr;
4895 msq_init = *(floor(p1))
4896 p2 = initial_addr + VS - 1;
4897 realignment_token = call target_builtin;
4898 indx = 0;
4899 loop {
4900 p2 = p2 + indx * vectype_size
4901 lsq = *(floor(p2))
4902 vec_dest = realign_load (msq, lsq, realignment_token)
4903 indx = indx + 1;
4904 msq = lsq;
4905 } */
4906
4907 /* If the misalignment remains the same throughout the execution of the
4908 loop, we can create the init_addr and permutation mask at the loop
4909 preheader. Otherwise, it needs to be created inside the loop.
4910 This can only occur when vectorizing memory accesses in the inner-loop
4911 nested within an outer-loop that is being vectorized. */
4912
4913 if (nested_in_vect_loop
4914 && (TREE_INT_CST_LOW (DR_STEP (dr))
4915 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4916 {
4917 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4918 compute_in_loop = true;
4919 }
4920
4921 if ((alignment_support_scheme == dr_explicit_realign_optimized
4922 || alignment_support_scheme == dr_explicit_realign)
4923 && !compute_in_loop)
4924 {
4925 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4926 alignment_support_scheme, NULL_TREE,
4927 &at_loop);
4928 if (alignment_support_scheme == dr_explicit_realign_optimized)
4929 {
4930 phi = SSA_NAME_DEF_STMT (msq);
4931 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4932 }
4933 }
4934 else
4935 at_loop = loop;
4936
4937 if (negative)
4938 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4939
4940 if (load_lanes_p)
4941 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4942 else
4943 aggr_type = vectype;
4944
4945 prev_stmt_info = NULL;
4946 for (j = 0; j < ncopies; j++)
4947 {
4948 /* 1. Create the vector or array pointer update chain. */
4949 if (j == 0)
4950 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4951 offset, &dummy, gsi,
4952 &ptr_incr, false, &inv_p);
4953 else
4954 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4955 TYPE_SIZE_UNIT (aggr_type));
4956
4957 if (grouped_load || slp_perm)
4958 dr_chain.create (vec_num);
4959
4960 if (load_lanes_p)
4961 {
4962 tree vec_array;
4963
4964 vec_array = create_vector_array (vectype, vec_num);
4965
4966 /* Emit:
4967 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4968 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4969 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4970 gimple_call_set_lhs (new_stmt, vec_array);
4971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4972
4973 /* Extract each vector into an SSA_NAME. */
4974 for (i = 0; i < vec_num; i++)
4975 {
4976 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4977 vec_array, i);
4978 dr_chain.quick_push (new_temp);
4979 }
4980
4981 /* Record the mapping between SSA_NAMEs and statements. */
4982 vect_record_grouped_load_vectors (stmt, dr_chain);
4983 }
4984 else
4985 {
4986 for (i = 0; i < vec_num; i++)
4987 {
4988 if (i > 0)
4989 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4990 stmt, NULL_TREE);
4991
4992 /* 2. Create the vector-load in the loop. */
4993 switch (alignment_support_scheme)
4994 {
4995 case dr_aligned:
4996 case dr_unaligned_supported:
4997 {
4998 unsigned int align, misalign;
4999
5000 data_ref
5001 = build2 (MEM_REF, vectype, dataref_ptr,
5002 build_int_cst (reference_alias_ptr_type
5003 (DR_REF (first_dr)), 0));
5004 align = TYPE_ALIGN_UNIT (vectype);
5005 if (alignment_support_scheme == dr_aligned)
5006 {
5007 gcc_assert (aligned_access_p (first_dr));
5008 misalign = 0;
5009 }
5010 else if (DR_MISALIGNMENT (first_dr) == -1)
5011 {
5012 TREE_TYPE (data_ref)
5013 = build_aligned_type (TREE_TYPE (data_ref),
5014 TYPE_ALIGN (elem_type));
5015 align = TYPE_ALIGN_UNIT (elem_type);
5016 misalign = 0;
5017 }
5018 else
5019 {
5020 TREE_TYPE (data_ref)
5021 = build_aligned_type (TREE_TYPE (data_ref),
5022 TYPE_ALIGN (elem_type));
5023 misalign = DR_MISALIGNMENT (first_dr);
5024 }
5025 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5026 align, misalign);
5027 break;
5028 }
5029 case dr_explicit_realign:
5030 {
5031 tree ptr, bump;
5032 tree vs_minus_1;
5033
5034 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5035
5036 if (compute_in_loop)
5037 msq = vect_setup_realignment (first_stmt, gsi,
5038 &realignment_token,
5039 dr_explicit_realign,
5040 dataref_ptr, NULL);
5041
5042 ptr = copy_ssa_name (dataref_ptr, NULL);
5043 new_stmt = gimple_build_assign_with_ops
5044 (BIT_AND_EXPR, ptr, dataref_ptr,
5045 build_int_cst
5046 (TREE_TYPE (dataref_ptr),
5047 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5048 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5049 data_ref
5050 = build2 (MEM_REF, vectype, ptr,
5051 build_int_cst (reference_alias_ptr_type
5052 (DR_REF (first_dr)), 0));
5053 vec_dest = vect_create_destination_var (scalar_dest,
5054 vectype);
5055 new_stmt = gimple_build_assign (vec_dest, data_ref);
5056 new_temp = make_ssa_name (vec_dest, new_stmt);
5057 gimple_assign_set_lhs (new_stmt, new_temp);
5058 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5059 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5060 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5061 msq = new_temp;
5062
5063 bump = size_binop (MULT_EXPR, vs_minus_1,
5064 TYPE_SIZE_UNIT (elem_type));
5065 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5066 new_stmt = gimple_build_assign_with_ops
5067 (BIT_AND_EXPR, NULL_TREE, ptr,
5068 build_int_cst
5069 (TREE_TYPE (ptr),
5070 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5071 ptr = copy_ssa_name (dataref_ptr, new_stmt);
5072 gimple_assign_set_lhs (new_stmt, ptr);
5073 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5074 data_ref
5075 = build2 (MEM_REF, vectype, ptr,
5076 build_int_cst (reference_alias_ptr_type
5077 (DR_REF (first_dr)), 0));
5078 break;
5079 }
5080 case dr_explicit_realign_optimized:
5081 new_temp = copy_ssa_name (dataref_ptr, NULL);
5082 new_stmt = gimple_build_assign_with_ops
5083 (BIT_AND_EXPR, new_temp, dataref_ptr,
5084 build_int_cst
5085 (TREE_TYPE (dataref_ptr),
5086 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5088 data_ref
5089 = build2 (MEM_REF, vectype, new_temp,
5090 build_int_cst (reference_alias_ptr_type
5091 (DR_REF (first_dr)), 0));
5092 break;
5093 default:
5094 gcc_unreachable ();
5095 }
5096 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5097 new_stmt = gimple_build_assign (vec_dest, data_ref);
5098 new_temp = make_ssa_name (vec_dest, new_stmt);
5099 gimple_assign_set_lhs (new_stmt, new_temp);
5100 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5101
5102 /* 3. Handle explicit realignment if necessary/supported.
5103 Create in loop:
5104 vec_dest = realign_load (msq, lsq, realignment_token) */
5105 if (alignment_support_scheme == dr_explicit_realign_optimized
5106 || alignment_support_scheme == dr_explicit_realign)
5107 {
5108 lsq = gimple_assign_lhs (new_stmt);
5109 if (!realignment_token)
5110 realignment_token = dataref_ptr;
5111 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5112 new_stmt
5113 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5114 vec_dest, msq, lsq,
5115 realignment_token);
5116 new_temp = make_ssa_name (vec_dest, new_stmt);
5117 gimple_assign_set_lhs (new_stmt, new_temp);
5118 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5119
5120 if (alignment_support_scheme == dr_explicit_realign_optimized)
5121 {
5122 gcc_assert (phi);
5123 if (i == vec_num - 1 && j == ncopies - 1)
5124 add_phi_arg (phi, lsq,
5125 loop_latch_edge (containing_loop),
5126 UNKNOWN_LOCATION);
5127 msq = lsq;
5128 }
5129 }
5130
5131 /* 4. Handle invariant-load. */
5132 if (inv_p && !bb_vinfo)
5133 {
5134 gimple_stmt_iterator gsi2 = *gsi;
5135 gcc_assert (!grouped_load);
5136 gsi_next (&gsi2);
5137 new_temp = vect_init_vector (stmt, scalar_dest,
5138 vectype, &gsi2);
5139 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5140 }
5141
5142 if (negative)
5143 {
5144 tree perm_mask = perm_mask_for_reverse (vectype);
5145 new_temp = permute_vec_elements (new_temp, new_temp,
5146 perm_mask, stmt, gsi);
5147 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5148 }
5149
5150 /* Collect vector loads and later create their permutation in
5151 vect_transform_grouped_load (). */
5152 if (grouped_load || slp_perm)
5153 dr_chain.quick_push (new_temp);
5154
5155 /* Store vector loads in the corresponding SLP_NODE. */
5156 if (slp && !slp_perm)
5157 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5158 }
5159 /* Bump the vector pointer to account for a gap. */
5160 if (slp && group_gap != 0)
5161 {
5162 tree bump = size_binop (MULT_EXPR,
5163 TYPE_SIZE_UNIT (elem_type),
5164 size_int (group_gap));
5165 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5166 stmt, bump);
5167 }
5168 }
5169
5170 if (slp && !slp_perm)
5171 continue;
5172
5173 if (slp_perm)
5174 {
5175 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
5176 slp_node_instance, false))
5177 {
5178 dr_chain.release ();
5179 return false;
5180 }
5181 }
5182 else
5183 {
5184 if (grouped_load)
5185 {
5186 if (!load_lanes_p)
5187 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5188 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5189 }
5190 else
5191 {
5192 if (j == 0)
5193 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5194 else
5195 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5196 prev_stmt_info = vinfo_for_stmt (new_stmt);
5197 }
5198 }
5199 dr_chain.release ();
5200 }
5201
5202 return true;
5203 }
5204
5205 /* Function vect_is_simple_cond.
5206
5207 Input:
5208 LOOP - the loop that is being vectorized.
5209 COND - Condition that is checked for simple use.
5210
5211 Output:
5212 *COMP_VECTYPE - the vector type for the comparison.
5213
5214 Returns whether a COND can be vectorized. Checks whether
5215 condition operands are supportable using vec_is_simple_use. */
5216
5217 static bool
5218 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5219 bb_vec_info bb_vinfo, tree *comp_vectype)
5220 {
5221 tree lhs, rhs;
5222 tree def;
5223 enum vect_def_type dt;
5224 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5225
5226 if (!COMPARISON_CLASS_P (cond))
5227 return false;
5228
5229 lhs = TREE_OPERAND (cond, 0);
5230 rhs = TREE_OPERAND (cond, 1);
5231
5232 if (TREE_CODE (lhs) == SSA_NAME)
5233 {
5234 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5235 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5236 &lhs_def_stmt, &def, &dt, &vectype1))
5237 return false;
5238 }
5239 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5240 && TREE_CODE (lhs) != FIXED_CST)
5241 return false;
5242
5243 if (TREE_CODE (rhs) == SSA_NAME)
5244 {
5245 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5246 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5247 &rhs_def_stmt, &def, &dt, &vectype2))
5248 return false;
5249 }
5250 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5251 && TREE_CODE (rhs) != FIXED_CST)
5252 return false;
5253
5254 *comp_vectype = vectype1 ? vectype1 : vectype2;
5255 return true;
5256 }
5257
5258 /* vectorizable_condition.
5259
5260 Check if STMT is conditional modify expression that can be vectorized.
5261 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5262 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5263 at GSI.
5264
5265 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5266 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5267 else caluse if it is 2).
5268
5269 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5270
5271 bool
5272 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5273 gimple *vec_stmt, tree reduc_def, int reduc_index,
5274 slp_tree slp_node)
5275 {
5276 tree scalar_dest = NULL_TREE;
5277 tree vec_dest = NULL_TREE;
5278 tree cond_expr, then_clause, else_clause;
5279 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5280 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5281 tree comp_vectype = NULL_TREE;
5282 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5283 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5284 tree vec_compare, vec_cond_expr;
5285 tree new_temp;
5286 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5287 tree def;
5288 enum vect_def_type dt, dts[4];
5289 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5290 int ncopies;
5291 enum tree_code code;
5292 stmt_vec_info prev_stmt_info = NULL;
5293 int i, j;
5294 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5295 vec<tree> vec_oprnds0 = vNULL;
5296 vec<tree> vec_oprnds1 = vNULL;
5297 vec<tree> vec_oprnds2 = vNULL;
5298 vec<tree> vec_oprnds3 = vNULL;
5299 tree vec_cmp_type;
5300
5301 if (slp_node || PURE_SLP_STMT (stmt_info))
5302 ncopies = 1;
5303 else
5304 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5305
5306 gcc_assert (ncopies >= 1);
5307 if (reduc_index && ncopies > 1)
5308 return false; /* FORNOW */
5309
5310 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5311 return false;
5312
5313 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5314 return false;
5315
5316 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5317 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5318 && reduc_def))
5319 return false;
5320
5321 /* FORNOW: not yet supported. */
5322 if (STMT_VINFO_LIVE_P (stmt_info))
5323 {
5324 if (dump_enabled_p ())
5325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5326 "value used after loop.");
5327 return false;
5328 }
5329
5330 /* Is vectorizable conditional operation? */
5331 if (!is_gimple_assign (stmt))
5332 return false;
5333
5334 code = gimple_assign_rhs_code (stmt);
5335
5336 if (code != COND_EXPR)
5337 return false;
5338
5339 cond_expr = gimple_assign_rhs1 (stmt);
5340 then_clause = gimple_assign_rhs2 (stmt);
5341 else_clause = gimple_assign_rhs3 (stmt);
5342
5343 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5344 &comp_vectype)
5345 || !comp_vectype)
5346 return false;
5347
5348 if (TREE_CODE (then_clause) == SSA_NAME)
5349 {
5350 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5351 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5352 &then_def_stmt, &def, &dt))
5353 return false;
5354 }
5355 else if (TREE_CODE (then_clause) != INTEGER_CST
5356 && TREE_CODE (then_clause) != REAL_CST
5357 && TREE_CODE (then_clause) != FIXED_CST)
5358 return false;
5359
5360 if (TREE_CODE (else_clause) == SSA_NAME)
5361 {
5362 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5363 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5364 &else_def_stmt, &def, &dt))
5365 return false;
5366 }
5367 else if (TREE_CODE (else_clause) != INTEGER_CST
5368 && TREE_CODE (else_clause) != REAL_CST
5369 && TREE_CODE (else_clause) != FIXED_CST)
5370 return false;
5371
5372 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5373 /* The result of a vector comparison should be signed type. */
5374 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5375 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5376 if (vec_cmp_type == NULL_TREE)
5377 return false;
5378
5379 if (!vec_stmt)
5380 {
5381 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5382 return expand_vec_cond_expr_p (vectype, comp_vectype);
5383 }
5384
5385 /* Transform. */
5386
5387 if (!slp_node)
5388 {
5389 vec_oprnds0.create (1);
5390 vec_oprnds1.create (1);
5391 vec_oprnds2.create (1);
5392 vec_oprnds3.create (1);
5393 }
5394
5395 /* Handle def. */
5396 scalar_dest = gimple_assign_lhs (stmt);
5397 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5398
5399 /* Handle cond expr. */
5400 for (j = 0; j < ncopies; j++)
5401 {
5402 gimple new_stmt = NULL;
5403 if (j == 0)
5404 {
5405 if (slp_node)
5406 {
5407 vec<tree> ops;
5408 ops.create (4);
5409 vec<vec<tree> > vec_defs;
5410
5411 vec_defs.create (4);
5412 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5413 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5414 ops.safe_push (then_clause);
5415 ops.safe_push (else_clause);
5416 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5417 vec_oprnds3 = vec_defs.pop ();
5418 vec_oprnds2 = vec_defs.pop ();
5419 vec_oprnds1 = vec_defs.pop ();
5420 vec_oprnds0 = vec_defs.pop ();
5421
5422 ops.release ();
5423 vec_defs.release ();
5424 }
5425 else
5426 {
5427 gimple gtemp;
5428 vec_cond_lhs =
5429 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5430 stmt, NULL);
5431 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5432 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5433
5434 vec_cond_rhs =
5435 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5436 stmt, NULL);
5437 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5438 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5439 if (reduc_index == 1)
5440 vec_then_clause = reduc_def;
5441 else
5442 {
5443 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5444 stmt, NULL);
5445 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5446 NULL, &gtemp, &def, &dts[2]);
5447 }
5448 if (reduc_index == 2)
5449 vec_else_clause = reduc_def;
5450 else
5451 {
5452 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5453 stmt, NULL);
5454 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5455 NULL, &gtemp, &def, &dts[3]);
5456 }
5457 }
5458 }
5459 else
5460 {
5461 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5462 vec_oprnds0.pop ());
5463 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5464 vec_oprnds1.pop ());
5465 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5466 vec_oprnds2.pop ());
5467 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5468 vec_oprnds3.pop ());
5469 }
5470
5471 if (!slp_node)
5472 {
5473 vec_oprnds0.quick_push (vec_cond_lhs);
5474 vec_oprnds1.quick_push (vec_cond_rhs);
5475 vec_oprnds2.quick_push (vec_then_clause);
5476 vec_oprnds3.quick_push (vec_else_clause);
5477 }
5478
5479 /* Arguments are ready. Create the new vector stmt. */
5480 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
5481 {
5482 vec_cond_rhs = vec_oprnds1[i];
5483 vec_then_clause = vec_oprnds2[i];
5484 vec_else_clause = vec_oprnds3[i];
5485
5486 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5487 vec_cond_lhs, vec_cond_rhs);
5488 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5489 vec_compare, vec_then_clause, vec_else_clause);
5490
5491 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5492 new_temp = make_ssa_name (vec_dest, new_stmt);
5493 gimple_assign_set_lhs (new_stmt, new_temp);
5494 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5495 if (slp_node)
5496 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5497 }
5498
5499 if (slp_node)
5500 continue;
5501
5502 if (j == 0)
5503 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5504 else
5505 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5506
5507 prev_stmt_info = vinfo_for_stmt (new_stmt);
5508 }
5509
5510 vec_oprnds0.release ();
5511 vec_oprnds1.release ();
5512 vec_oprnds2.release ();
5513 vec_oprnds3.release ();
5514
5515 return true;
5516 }
5517
5518
5519 /* Make sure the statement is vectorizable. */
5520
5521 bool
5522 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5523 {
5524 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5525 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5526 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5527 bool ok;
5528 tree scalar_type, vectype;
5529 gimple pattern_stmt;
5530 gimple_seq pattern_def_seq;
5531
5532 if (dump_enabled_p ())
5533 {
5534 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5535 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5536 }
5537
5538 if (gimple_has_volatile_ops (stmt))
5539 {
5540 if (dump_enabled_p ())
5541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5542 "not vectorized: stmt has volatile operands");
5543
5544 return false;
5545 }
5546
5547 /* Skip stmts that do not need to be vectorized. In loops this is expected
5548 to include:
5549 - the COND_EXPR which is the loop exit condition
5550 - any LABEL_EXPRs in the loop
5551 - computations that are used only for array indexing or loop control.
5552 In basic blocks we only analyze statements that are a part of some SLP
5553 instance, therefore, all the statements are relevant.
5554
5555 Pattern statement needs to be analyzed instead of the original statement
5556 if the original statement is not relevant. Otherwise, we analyze both
5557 statements. In basic blocks we are called from some SLP instance
5558 traversal, don't analyze pattern stmts instead, the pattern stmts
5559 already will be part of SLP instance. */
5560
5561 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5562 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5563 && !STMT_VINFO_LIVE_P (stmt_info))
5564 {
5565 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5566 && pattern_stmt
5567 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5568 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5569 {
5570 /* Analyze PATTERN_STMT instead of the original stmt. */
5571 stmt = pattern_stmt;
5572 stmt_info = vinfo_for_stmt (pattern_stmt);
5573 if (dump_enabled_p ())
5574 {
5575 dump_printf_loc (MSG_NOTE, vect_location,
5576 "==> examining pattern statement: ");
5577 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5578 }
5579 }
5580 else
5581 {
5582 if (dump_enabled_p ())
5583 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
5584
5585 return true;
5586 }
5587 }
5588 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5589 && node == NULL
5590 && pattern_stmt
5591 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5592 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5593 {
5594 /* Analyze PATTERN_STMT too. */
5595 if (dump_enabled_p ())
5596 {
5597 dump_printf_loc (MSG_NOTE, vect_location,
5598 "==> examining pattern statement: ");
5599 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5600 }
5601
5602 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5603 return false;
5604 }
5605
5606 if (is_pattern_stmt_p (stmt_info)
5607 && node == NULL
5608 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5609 {
5610 gimple_stmt_iterator si;
5611
5612 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5613 {
5614 gimple pattern_def_stmt = gsi_stmt (si);
5615 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5616 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5617 {
5618 /* Analyze def stmt of STMT if it's a pattern stmt. */
5619 if (dump_enabled_p ())
5620 {
5621 dump_printf_loc (MSG_NOTE, vect_location,
5622 "==> examining pattern def statement: ");
5623 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5624 }
5625
5626 if (!vect_analyze_stmt (pattern_def_stmt,
5627 need_to_vectorize, node))
5628 return false;
5629 }
5630 }
5631 }
5632
5633 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5634 {
5635 case vect_internal_def:
5636 break;
5637
5638 case vect_reduction_def:
5639 case vect_nested_cycle:
5640 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5641 || relevance == vect_used_in_outer_by_reduction
5642 || relevance == vect_unused_in_scope));
5643 break;
5644
5645 case vect_induction_def:
5646 case vect_constant_def:
5647 case vect_external_def:
5648 case vect_unknown_def_type:
5649 default:
5650 gcc_unreachable ();
5651 }
5652
5653 if (bb_vinfo)
5654 {
5655 gcc_assert (PURE_SLP_STMT (stmt_info));
5656
5657 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5658 if (dump_enabled_p ())
5659 {
5660 dump_printf_loc (MSG_NOTE, vect_location,
5661 "get vectype for scalar type: ");
5662 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5663 }
5664
5665 vectype = get_vectype_for_scalar_type (scalar_type);
5666 if (!vectype)
5667 {
5668 if (dump_enabled_p ())
5669 {
5670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5671 "not SLPed: unsupported data-type ");
5672 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5673 scalar_type);
5674 }
5675 return false;
5676 }
5677
5678 if (dump_enabled_p ())
5679 {
5680 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5681 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5682 }
5683
5684 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5685 }
5686
5687 if (STMT_VINFO_RELEVANT_P (stmt_info))
5688 {
5689 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5690 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5691 *need_to_vectorize = true;
5692 }
5693
5694 ok = true;
5695 if (!bb_vinfo
5696 && (STMT_VINFO_RELEVANT_P (stmt_info)
5697 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5698 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5699 || vectorizable_shift (stmt, NULL, NULL, NULL)
5700 || vectorizable_operation (stmt, NULL, NULL, NULL)
5701 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5702 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5703 || vectorizable_call (stmt, NULL, NULL, NULL)
5704 || vectorizable_store (stmt, NULL, NULL, NULL)
5705 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5706 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5707 else
5708 {
5709 if (bb_vinfo)
5710 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5711 || vectorizable_shift (stmt, NULL, NULL, node)
5712 || vectorizable_operation (stmt, NULL, NULL, node)
5713 || vectorizable_assignment (stmt, NULL, NULL, node)
5714 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5715 || vectorizable_call (stmt, NULL, NULL, node)
5716 || vectorizable_store (stmt, NULL, NULL, node)
5717 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5718 }
5719
5720 if (!ok)
5721 {
5722 if (dump_enabled_p ())
5723 {
5724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5725 "not vectorized: relevant stmt not ");
5726 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5728 }
5729
5730 return false;
5731 }
5732
5733 if (bb_vinfo)
5734 return true;
5735
5736 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5737 need extra handling, except for vectorizable reductions. */
5738 if (STMT_VINFO_LIVE_P (stmt_info)
5739 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5740 ok = vectorizable_live_operation (stmt, NULL, NULL);
5741
5742 if (!ok)
5743 {
5744 if (dump_enabled_p ())
5745 {
5746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5747 "not vectorized: live stmt not ");
5748 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5749 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5750 }
5751
5752 return false;
5753 }
5754
5755 return true;
5756 }
5757
5758
5759 /* Function vect_transform_stmt.
5760
5761 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5762
5763 bool
5764 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5765 bool *grouped_store, slp_tree slp_node,
5766 slp_instance slp_node_instance)
5767 {
5768 bool is_store = false;
5769 gimple vec_stmt = NULL;
5770 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5771 bool done;
5772
5773 switch (STMT_VINFO_TYPE (stmt_info))
5774 {
5775 case type_demotion_vec_info_type:
5776 case type_promotion_vec_info_type:
5777 case type_conversion_vec_info_type:
5778 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5779 gcc_assert (done);
5780 break;
5781
5782 case induc_vec_info_type:
5783 gcc_assert (!slp_node);
5784 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5785 gcc_assert (done);
5786 break;
5787
5788 case shift_vec_info_type:
5789 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5790 gcc_assert (done);
5791 break;
5792
5793 case op_vec_info_type:
5794 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5795 gcc_assert (done);
5796 break;
5797
5798 case assignment_vec_info_type:
5799 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5800 gcc_assert (done);
5801 break;
5802
5803 case load_vec_info_type:
5804 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5805 slp_node_instance);
5806 gcc_assert (done);
5807 break;
5808
5809 case store_vec_info_type:
5810 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5811 gcc_assert (done);
5812 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5813 {
5814 /* In case of interleaving, the whole chain is vectorized when the
5815 last store in the chain is reached. Store stmts before the last
5816 one are skipped, and there vec_stmt_info shouldn't be freed
5817 meanwhile. */
5818 *grouped_store = true;
5819 if (STMT_VINFO_VEC_STMT (stmt_info))
5820 is_store = true;
5821 }
5822 else
5823 is_store = true;
5824 break;
5825
5826 case condition_vec_info_type:
5827 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5828 gcc_assert (done);
5829 break;
5830
5831 case call_vec_info_type:
5832 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5833 stmt = gsi_stmt (*gsi);
5834 break;
5835
5836 case reduc_vec_info_type:
5837 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5838 gcc_assert (done);
5839 break;
5840
5841 default:
5842 if (!STMT_VINFO_LIVE_P (stmt_info))
5843 {
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846 "stmt not supported.");
5847 gcc_unreachable ();
5848 }
5849 }
5850
5851 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5852 is being vectorized, but outside the immediately enclosing loop. */
5853 if (vec_stmt
5854 && STMT_VINFO_LOOP_VINFO (stmt_info)
5855 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5856 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5857 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5858 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5859 || STMT_VINFO_RELEVANT (stmt_info) ==
5860 vect_used_in_outer_by_reduction))
5861 {
5862 struct loop *innerloop = LOOP_VINFO_LOOP (
5863 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5864 imm_use_iterator imm_iter;
5865 use_operand_p use_p;
5866 tree scalar_dest;
5867 gimple exit_phi;
5868
5869 if (dump_enabled_p ())
5870 dump_printf_loc (MSG_NOTE, vect_location,
5871 "Record the vdef for outer-loop vectorization.");
5872
5873 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5874 (to be used when vectorizing outer-loop stmts that use the DEF of
5875 STMT). */
5876 if (gimple_code (stmt) == GIMPLE_PHI)
5877 scalar_dest = PHI_RESULT (stmt);
5878 else
5879 scalar_dest = gimple_assign_lhs (stmt);
5880
5881 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5882 {
5883 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5884 {
5885 exit_phi = USE_STMT (use_p);
5886 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5887 }
5888 }
5889 }
5890
5891 /* Handle stmts whose DEF is used outside the loop-nest that is
5892 being vectorized. */
5893 if (STMT_VINFO_LIVE_P (stmt_info)
5894 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5895 {
5896 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5897 gcc_assert (done);
5898 }
5899
5900 if (vec_stmt)
5901 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5902
5903 return is_store;
5904 }
5905
5906
5907 /* Remove a group of stores (for SLP or interleaving), free their
5908 stmt_vec_info. */
5909
5910 void
5911 vect_remove_stores (gimple first_stmt)
5912 {
5913 gimple next = first_stmt;
5914 gimple tmp;
5915 gimple_stmt_iterator next_si;
5916
5917 while (next)
5918 {
5919 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5920
5921 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5922 if (is_pattern_stmt_p (stmt_info))
5923 next = STMT_VINFO_RELATED_STMT (stmt_info);
5924 /* Free the attached stmt_vec_info and remove the stmt. */
5925 next_si = gsi_for_stmt (next);
5926 unlink_stmt_vdef (next);
5927 gsi_remove (&next_si, true);
5928 release_defs (next);
5929 free_stmt_vec_info (next);
5930 next = tmp;
5931 }
5932 }
5933
5934
5935 /* Function new_stmt_vec_info.
5936
5937 Create and initialize a new stmt_vec_info struct for STMT. */
5938
5939 stmt_vec_info
5940 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5941 bb_vec_info bb_vinfo)
5942 {
5943 stmt_vec_info res;
5944 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5945
5946 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5947 STMT_VINFO_STMT (res) = stmt;
5948 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5949 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5950 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5951 STMT_VINFO_LIVE_P (res) = false;
5952 STMT_VINFO_VECTYPE (res) = NULL;
5953 STMT_VINFO_VEC_STMT (res) = NULL;
5954 STMT_VINFO_VECTORIZABLE (res) = true;
5955 STMT_VINFO_IN_PATTERN_P (res) = false;
5956 STMT_VINFO_RELATED_STMT (res) = NULL;
5957 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5958 STMT_VINFO_DATA_REF (res) = NULL;
5959
5960 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5961 STMT_VINFO_DR_OFFSET (res) = NULL;
5962 STMT_VINFO_DR_INIT (res) = NULL;
5963 STMT_VINFO_DR_STEP (res) = NULL;
5964 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5965
5966 if (gimple_code (stmt) == GIMPLE_PHI
5967 && is_loop_header_bb_p (gimple_bb (stmt)))
5968 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5969 else
5970 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5971
5972 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
5973 STMT_SLP_TYPE (res) = loop_vect;
5974 GROUP_FIRST_ELEMENT (res) = NULL;
5975 GROUP_NEXT_ELEMENT (res) = NULL;
5976 GROUP_SIZE (res) = 0;
5977 GROUP_STORE_COUNT (res) = 0;
5978 GROUP_GAP (res) = 0;
5979 GROUP_SAME_DR_STMT (res) = NULL;
5980
5981 return res;
5982 }
5983
5984
5985 /* Create a hash table for stmt_vec_info. */
5986
5987 void
5988 init_stmt_vec_info_vec (void)
5989 {
5990 gcc_assert (!stmt_vec_info_vec.exists ());
5991 stmt_vec_info_vec.create (50);
5992 }
5993
5994
5995 /* Free hash table for stmt_vec_info. */
5996
5997 void
5998 free_stmt_vec_info_vec (void)
5999 {
6000 unsigned int i;
6001 vec_void_p info;
6002 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6003 if (info != NULL)
6004 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
6005 gcc_assert (stmt_vec_info_vec.exists ());
6006 stmt_vec_info_vec.release ();
6007 }
6008
6009
6010 /* Free stmt vectorization related info. */
6011
6012 void
6013 free_stmt_vec_info (gimple stmt)
6014 {
6015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6016
6017 if (!stmt_info)
6018 return;
6019
6020 /* Check if this statement has a related "pattern stmt"
6021 (introduced by the vectorizer during the pattern recognition
6022 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6023 too. */
6024 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6025 {
6026 stmt_vec_info patt_info
6027 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6028 if (patt_info)
6029 {
6030 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6031 if (seq)
6032 {
6033 gimple_stmt_iterator si;
6034 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6035 free_stmt_vec_info (gsi_stmt (si));
6036 }
6037 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6038 }
6039 }
6040
6041 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6042 set_vinfo_for_stmt (stmt, NULL);
6043 free (stmt_info);
6044 }
6045
6046
6047 /* Function get_vectype_for_scalar_type_and_size.
6048
6049 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6050 by the target. */
6051
6052 static tree
6053 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6054 {
6055 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6056 enum machine_mode simd_mode;
6057 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6058 int nunits;
6059 tree vectype;
6060
6061 if (nbytes == 0)
6062 return NULL_TREE;
6063
6064 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6065 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6066 return NULL_TREE;
6067
6068 /* For vector types of elements whose mode precision doesn't
6069 match their types precision we use a element type of mode
6070 precision. The vectorization routines will have to make sure
6071 they support the proper result truncation/extension.
6072 We also make sure to build vector types with INTEGER_TYPE
6073 component type only. */
6074 if (INTEGRAL_TYPE_P (scalar_type)
6075 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6076 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6077 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6078 TYPE_UNSIGNED (scalar_type));
6079
6080 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6081 When the component mode passes the above test simply use a type
6082 corresponding to that mode. The theory is that any use that
6083 would cause problems with this will disable vectorization anyway. */
6084 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6085 && !INTEGRAL_TYPE_P (scalar_type)
6086 && !POINTER_TYPE_P (scalar_type))
6087 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6088
6089 /* We can't build a vector type of elements with alignment bigger than
6090 their size. */
6091 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6092 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6093 TYPE_UNSIGNED (scalar_type));
6094
6095 /* If we felt back to using the mode fail if there was
6096 no scalar type for it. */
6097 if (scalar_type == NULL_TREE)
6098 return NULL_TREE;
6099
6100 /* If no size was supplied use the mode the target prefers. Otherwise
6101 lookup a vector mode of the specified size. */
6102 if (size == 0)
6103 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6104 else
6105 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6106 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6107 if (nunits <= 1)
6108 return NULL_TREE;
6109
6110 vectype = build_vector_type (scalar_type, nunits);
6111
6112 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6113 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6114 return NULL_TREE;
6115
6116 return vectype;
6117 }
6118
6119 unsigned int current_vector_size;
6120
6121 /* Function get_vectype_for_scalar_type.
6122
6123 Returns the vector type corresponding to SCALAR_TYPE as supported
6124 by the target. */
6125
6126 tree
6127 get_vectype_for_scalar_type (tree scalar_type)
6128 {
6129 tree vectype;
6130 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6131 current_vector_size);
6132 if (vectype
6133 && current_vector_size == 0)
6134 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6135 return vectype;
6136 }
6137
6138 /* Function get_same_sized_vectype
6139
6140 Returns a vector type corresponding to SCALAR_TYPE of size
6141 VECTOR_TYPE if supported by the target. */
6142
6143 tree
6144 get_same_sized_vectype (tree scalar_type, tree vector_type)
6145 {
6146 return get_vectype_for_scalar_type_and_size
6147 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6148 }
6149
6150 /* Function vect_is_simple_use.
6151
6152 Input:
6153 LOOP_VINFO - the vect info of the loop that is being vectorized.
6154 BB_VINFO - the vect info of the basic block that is being vectorized.
6155 OPERAND - operand of STMT in the loop or bb.
6156 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6157
6158 Returns whether a stmt with OPERAND can be vectorized.
6159 For loops, supportable operands are constants, loop invariants, and operands
6160 that are defined by the current iteration of the loop. Unsupportable
6161 operands are those that are defined by a previous iteration of the loop (as
6162 is the case in reduction/induction computations).
6163 For basic blocks, supportable operands are constants and bb invariants.
6164 For now, operands defined outside the basic block are not supported. */
6165
6166 bool
6167 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6168 bb_vec_info bb_vinfo, gimple *def_stmt,
6169 tree *def, enum vect_def_type *dt)
6170 {
6171 basic_block bb;
6172 stmt_vec_info stmt_vinfo;
6173 struct loop *loop = NULL;
6174
6175 if (loop_vinfo)
6176 loop = LOOP_VINFO_LOOP (loop_vinfo);
6177
6178 *def_stmt = NULL;
6179 *def = NULL_TREE;
6180
6181 if (dump_enabled_p ())
6182 {
6183 dump_printf_loc (MSG_NOTE, vect_location,
6184 "vect_is_simple_use: operand ");
6185 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6186 }
6187
6188 if (CONSTANT_CLASS_P (operand))
6189 {
6190 *dt = vect_constant_def;
6191 return true;
6192 }
6193
6194 if (is_gimple_min_invariant (operand))
6195 {
6196 *def = operand;
6197 *dt = vect_external_def;
6198 return true;
6199 }
6200
6201 if (TREE_CODE (operand) == PAREN_EXPR)
6202 {
6203 if (dump_enabled_p ())
6204 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
6205 operand = TREE_OPERAND (operand, 0);
6206 }
6207
6208 if (TREE_CODE (operand) != SSA_NAME)
6209 {
6210 if (dump_enabled_p ())
6211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6212 "not ssa-name.");
6213 return false;
6214 }
6215
6216 *def_stmt = SSA_NAME_DEF_STMT (operand);
6217 if (*def_stmt == NULL)
6218 {
6219 if (dump_enabled_p ())
6220 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6221 "no def_stmt.");
6222 return false;
6223 }
6224
6225 if (dump_enabled_p ())
6226 {
6227 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6228 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6229 }
6230
6231 /* Empty stmt is expected only in case of a function argument.
6232 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6233 if (gimple_nop_p (*def_stmt))
6234 {
6235 *def = operand;
6236 *dt = vect_external_def;
6237 return true;
6238 }
6239
6240 bb = gimple_bb (*def_stmt);
6241
6242 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6243 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6244 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6245 *dt = vect_external_def;
6246 else
6247 {
6248 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6249 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6250 }
6251
6252 if (*dt == vect_unknown_def_type
6253 || (stmt
6254 && *dt == vect_double_reduction_def
6255 && gimple_code (stmt) != GIMPLE_PHI))
6256 {
6257 if (dump_enabled_p ())
6258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6259 "Unsupported pattern.");
6260 return false;
6261 }
6262
6263 if (dump_enabled_p ())
6264 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
6265
6266 switch (gimple_code (*def_stmt))
6267 {
6268 case GIMPLE_PHI:
6269 *def = gimple_phi_result (*def_stmt);
6270 break;
6271
6272 case GIMPLE_ASSIGN:
6273 *def = gimple_assign_lhs (*def_stmt);
6274 break;
6275
6276 case GIMPLE_CALL:
6277 *def = gimple_call_lhs (*def_stmt);
6278 if (*def != NULL)
6279 break;
6280 /* FALLTHRU */
6281 default:
6282 if (dump_enabled_p ())
6283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6284 "unsupported defining stmt: ");
6285 return false;
6286 }
6287
6288 return true;
6289 }
6290
6291 /* Function vect_is_simple_use_1.
6292
6293 Same as vect_is_simple_use_1 but also determines the vector operand
6294 type of OPERAND and stores it to *VECTYPE. If the definition of
6295 OPERAND is vect_uninitialized_def, vect_constant_def or
6296 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6297 is responsible to compute the best suited vector type for the
6298 scalar operand. */
6299
6300 bool
6301 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6302 bb_vec_info bb_vinfo, gimple *def_stmt,
6303 tree *def, enum vect_def_type *dt, tree *vectype)
6304 {
6305 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6306 def, dt))
6307 return false;
6308
6309 /* Now get a vector type if the def is internal, otherwise supply
6310 NULL_TREE and leave it up to the caller to figure out a proper
6311 type for the use stmt. */
6312 if (*dt == vect_internal_def
6313 || *dt == vect_induction_def
6314 || *dt == vect_reduction_def
6315 || *dt == vect_double_reduction_def
6316 || *dt == vect_nested_cycle)
6317 {
6318 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6319
6320 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6321 && !STMT_VINFO_RELEVANT (stmt_info)
6322 && !STMT_VINFO_LIVE_P (stmt_info))
6323 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6324
6325 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6326 gcc_assert (*vectype != NULL_TREE);
6327 }
6328 else if (*dt == vect_uninitialized_def
6329 || *dt == vect_constant_def
6330 || *dt == vect_external_def)
6331 *vectype = NULL_TREE;
6332 else
6333 gcc_unreachable ();
6334
6335 return true;
6336 }
6337
6338
6339 /* Function supportable_widening_operation
6340
6341 Check whether an operation represented by the code CODE is a
6342 widening operation that is supported by the target platform in
6343 vector form (i.e., when operating on arguments of type VECTYPE_IN
6344 producing a result of type VECTYPE_OUT).
6345
6346 Widening operations we currently support are NOP (CONVERT), FLOAT
6347 and WIDEN_MULT. This function checks if these operations are supported
6348 by the target platform either directly (via vector tree-codes), or via
6349 target builtins.
6350
6351 Output:
6352 - CODE1 and CODE2 are codes of vector operations to be used when
6353 vectorizing the operation, if available.
6354 - MULTI_STEP_CVT determines the number of required intermediate steps in
6355 case of multi-step conversion (like char->short->int - in that case
6356 MULTI_STEP_CVT will be 1).
6357 - INTERM_TYPES contains the intermediate type required to perform the
6358 widening operation (short in the above example). */
6359
6360 bool
6361 supportable_widening_operation (enum tree_code code, gimple stmt,
6362 tree vectype_out, tree vectype_in,
6363 enum tree_code *code1, enum tree_code *code2,
6364 int *multi_step_cvt,
6365 vec<tree> *interm_types)
6366 {
6367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6368 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6369 struct loop *vect_loop = NULL;
6370 enum machine_mode vec_mode;
6371 enum insn_code icode1, icode2;
6372 optab optab1, optab2;
6373 tree vectype = vectype_in;
6374 tree wide_vectype = vectype_out;
6375 enum tree_code c1, c2;
6376 int i;
6377 tree prev_type, intermediate_type;
6378 enum machine_mode intermediate_mode, prev_mode;
6379 optab optab3, optab4;
6380
6381 *multi_step_cvt = 0;
6382 if (loop_info)
6383 vect_loop = LOOP_VINFO_LOOP (loop_info);
6384
6385 switch (code)
6386 {
6387 case WIDEN_MULT_EXPR:
6388 /* The result of a vectorized widening operation usually requires
6389 two vectors (because the widened results do not fit into one vector).
6390 The generated vector results would normally be expected to be
6391 generated in the same order as in the original scalar computation,
6392 i.e. if 8 results are generated in each vector iteration, they are
6393 to be organized as follows:
6394 vect1: [res1,res2,res3,res4],
6395 vect2: [res5,res6,res7,res8].
6396
6397 However, in the special case that the result of the widening
6398 operation is used in a reduction computation only, the order doesn't
6399 matter (because when vectorizing a reduction we change the order of
6400 the computation). Some targets can take advantage of this and
6401 generate more efficient code. For example, targets like Altivec,
6402 that support widen_mult using a sequence of {mult_even,mult_odd}
6403 generate the following vectors:
6404 vect1: [res1,res3,res5,res7],
6405 vect2: [res2,res4,res6,res8].
6406
6407 When vectorizing outer-loops, we execute the inner-loop sequentially
6408 (each vectorized inner-loop iteration contributes to VF outer-loop
6409 iterations in parallel). We therefore don't allow to change the
6410 order of the computation in the inner-loop during outer-loop
6411 vectorization. */
6412 /* TODO: Another case in which order doesn't *really* matter is when we
6413 widen and then contract again, e.g. (short)((int)x * y >> 8).
6414 Normally, pack_trunc performs an even/odd permute, whereas the
6415 repack from an even/odd expansion would be an interleave, which
6416 would be significantly simpler for e.g. AVX2. */
6417 /* In any case, in order to avoid duplicating the code below, recurse
6418 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6419 are properly set up for the caller. If we fail, we'll continue with
6420 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6421 if (vect_loop
6422 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6423 && !nested_in_vect_loop_p (vect_loop, stmt)
6424 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6425 stmt, vectype_out, vectype_in,
6426 code1, code2, multi_step_cvt,
6427 interm_types))
6428 return true;
6429 c1 = VEC_WIDEN_MULT_LO_EXPR;
6430 c2 = VEC_WIDEN_MULT_HI_EXPR;
6431 break;
6432
6433 case VEC_WIDEN_MULT_EVEN_EXPR:
6434 /* Support the recursion induced just above. */
6435 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6436 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6437 break;
6438
6439 case WIDEN_LSHIFT_EXPR:
6440 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6441 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6442 break;
6443
6444 CASE_CONVERT:
6445 c1 = VEC_UNPACK_LO_EXPR;
6446 c2 = VEC_UNPACK_HI_EXPR;
6447 break;
6448
6449 case FLOAT_EXPR:
6450 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6451 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6452 break;
6453
6454 case FIX_TRUNC_EXPR:
6455 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6456 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6457 computing the operation. */
6458 return false;
6459
6460 default:
6461 gcc_unreachable ();
6462 }
6463
6464 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6465 {
6466 enum tree_code ctmp = c1;
6467 c1 = c2;
6468 c2 = ctmp;
6469 }
6470
6471 if (code == FIX_TRUNC_EXPR)
6472 {
6473 /* The signedness is determined from output operand. */
6474 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6475 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6476 }
6477 else
6478 {
6479 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6480 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6481 }
6482
6483 if (!optab1 || !optab2)
6484 return false;
6485
6486 vec_mode = TYPE_MODE (vectype);
6487 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6488 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6489 return false;
6490
6491 *code1 = c1;
6492 *code2 = c2;
6493
6494 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6495 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6496 return true;
6497
6498 /* Check if it's a multi-step conversion that can be done using intermediate
6499 types. */
6500
6501 prev_type = vectype;
6502 prev_mode = vec_mode;
6503
6504 if (!CONVERT_EXPR_CODE_P (code))
6505 return false;
6506
6507 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6508 intermediate steps in promotion sequence. We try
6509 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6510 not. */
6511 interm_types->create (MAX_INTERM_CVT_STEPS);
6512 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6513 {
6514 intermediate_mode = insn_data[icode1].operand[0].mode;
6515 intermediate_type
6516 = lang_hooks.types.type_for_mode (intermediate_mode,
6517 TYPE_UNSIGNED (prev_type));
6518 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6519 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6520
6521 if (!optab3 || !optab4
6522 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6523 || insn_data[icode1].operand[0].mode != intermediate_mode
6524 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6525 || insn_data[icode2].operand[0].mode != intermediate_mode
6526 || ((icode1 = optab_handler (optab3, intermediate_mode))
6527 == CODE_FOR_nothing)
6528 || ((icode2 = optab_handler (optab4, intermediate_mode))
6529 == CODE_FOR_nothing))
6530 break;
6531
6532 interm_types->quick_push (intermediate_type);
6533 (*multi_step_cvt)++;
6534
6535 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6536 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6537 return true;
6538
6539 prev_type = intermediate_type;
6540 prev_mode = intermediate_mode;
6541 }
6542
6543 interm_types->release ();
6544 return false;
6545 }
6546
6547
6548 /* Function supportable_narrowing_operation
6549
6550 Check whether an operation represented by the code CODE is a
6551 narrowing operation that is supported by the target platform in
6552 vector form (i.e., when operating on arguments of type VECTYPE_IN
6553 and producing a result of type VECTYPE_OUT).
6554
6555 Narrowing operations we currently support are NOP (CONVERT) and
6556 FIX_TRUNC. This function checks if these operations are supported by
6557 the target platform directly via vector tree-codes.
6558
6559 Output:
6560 - CODE1 is the code of a vector operation to be used when
6561 vectorizing the operation, if available.
6562 - MULTI_STEP_CVT determines the number of required intermediate steps in
6563 case of multi-step conversion (like int->short->char - in that case
6564 MULTI_STEP_CVT will be 1).
6565 - INTERM_TYPES contains the intermediate type required to perform the
6566 narrowing operation (short in the above example). */
6567
6568 bool
6569 supportable_narrowing_operation (enum tree_code code,
6570 tree vectype_out, tree vectype_in,
6571 enum tree_code *code1, int *multi_step_cvt,
6572 vec<tree> *interm_types)
6573 {
6574 enum machine_mode vec_mode;
6575 enum insn_code icode1;
6576 optab optab1, interm_optab;
6577 tree vectype = vectype_in;
6578 tree narrow_vectype = vectype_out;
6579 enum tree_code c1;
6580 tree intermediate_type;
6581 enum machine_mode intermediate_mode, prev_mode;
6582 int i;
6583 bool uns;
6584
6585 *multi_step_cvt = 0;
6586 switch (code)
6587 {
6588 CASE_CONVERT:
6589 c1 = VEC_PACK_TRUNC_EXPR;
6590 break;
6591
6592 case FIX_TRUNC_EXPR:
6593 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6594 break;
6595
6596 case FLOAT_EXPR:
6597 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6598 tree code and optabs used for computing the operation. */
6599 return false;
6600
6601 default:
6602 gcc_unreachable ();
6603 }
6604
6605 if (code == FIX_TRUNC_EXPR)
6606 /* The signedness is determined from output operand. */
6607 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6608 else
6609 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6610
6611 if (!optab1)
6612 return false;
6613
6614 vec_mode = TYPE_MODE (vectype);
6615 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6616 return false;
6617
6618 *code1 = c1;
6619
6620 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6621 return true;
6622
6623 /* Check if it's a multi-step conversion that can be done using intermediate
6624 types. */
6625 prev_mode = vec_mode;
6626 if (code == FIX_TRUNC_EXPR)
6627 uns = TYPE_UNSIGNED (vectype_out);
6628 else
6629 uns = TYPE_UNSIGNED (vectype);
6630
6631 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6632 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6633 costly than signed. */
6634 if (code == FIX_TRUNC_EXPR && uns)
6635 {
6636 enum insn_code icode2;
6637
6638 intermediate_type
6639 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6640 interm_optab
6641 = optab_for_tree_code (c1, intermediate_type, optab_default);
6642 if (interm_optab != unknown_optab
6643 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6644 && insn_data[icode1].operand[0].mode
6645 == insn_data[icode2].operand[0].mode)
6646 {
6647 uns = false;
6648 optab1 = interm_optab;
6649 icode1 = icode2;
6650 }
6651 }
6652
6653 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6654 intermediate steps in promotion sequence. We try
6655 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6656 interm_types->create (MAX_INTERM_CVT_STEPS);
6657 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6658 {
6659 intermediate_mode = insn_data[icode1].operand[0].mode;
6660 intermediate_type
6661 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6662 interm_optab
6663 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6664 optab_default);
6665 if (!interm_optab
6666 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6667 || insn_data[icode1].operand[0].mode != intermediate_mode
6668 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6669 == CODE_FOR_nothing))
6670 break;
6671
6672 interm_types->quick_push (intermediate_type);
6673 (*multi_step_cvt)++;
6674
6675 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6676 return true;
6677
6678 prev_mode = intermediate_mode;
6679 optab1 = interm_optab;
6680 }
6681
6682 interm_types->release ();
6683 return false;
6684 }