1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "stor-layout.h"
35 #include "hard-reg-set.h"
38 #include "dominance.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
45 #include "gimple-expr.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
62 #include "recog.h" /* FIXME: for insn_data */
64 #include "diagnostic-core.h"
65 #include "tree-vectorizer.h"
70 /* For lang_hooks.types.type_for_mode. */
71 #include "langhooks.h"
73 /* Return the vectorized type for the given statement. */
76 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
78 return STMT_VINFO_VECTYPE (stmt_info
);
81 /* Return TRUE iff the given statement is in an inner loop relative to
82 the loop being vectorized. */
84 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
86 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
87 basic_block bb
= gimple_bb (stmt
);
88 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
94 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
96 return (bb
->loop_father
== loop
->inner
);
99 /* Record the cost of a statement, either by directly informing the
100 target model or by saving it in a vector for later processing.
101 Return a preliminary estimate of the statement's cost. */
104 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
105 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
106 int misalign
, enum vect_cost_model_location where
)
110 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
111 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
112 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
115 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
120 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
121 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
122 void *target_cost_data
;
125 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
127 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
129 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
134 /* Return a variable of type ELEM_TYPE[NELEMS]. */
137 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
139 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
143 /* ARRAY is an array of vectors created by create_vector_array.
144 Return an SSA_NAME for the vector in index N. The reference
145 is part of the vectorization of STMT and the vector is associated
146 with scalar destination SCALAR_DEST. */
149 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
150 tree array
, unsigned HOST_WIDE_INT n
)
152 tree vect_type
, vect
, vect_name
, array_ref
;
155 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
156 vect_type
= TREE_TYPE (TREE_TYPE (array
));
157 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
158 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
159 build_int_cst (size_type_node
, n
),
160 NULL_TREE
, NULL_TREE
);
162 new_stmt
= gimple_build_assign (vect
, array_ref
);
163 vect_name
= make_ssa_name (vect
, new_stmt
);
164 gimple_assign_set_lhs (new_stmt
, vect_name
);
165 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
170 /* ARRAY is an array of vectors created by create_vector_array.
171 Emit code to store SSA_NAME VECT in index N of the array.
172 The store is part of the vectorization of STMT. */
175 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
176 tree array
, unsigned HOST_WIDE_INT n
)
181 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
182 build_int_cst (size_type_node
, n
),
183 NULL_TREE
, NULL_TREE
);
185 new_stmt
= gimple_build_assign (array_ref
, vect
);
186 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
189 /* PTR is a pointer to an array of type TYPE. Return a representation
190 of *PTR. The memory reference replaces those in FIRST_DR
194 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
196 tree mem_ref
, alias_ptr_type
;
198 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
199 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
200 /* Arrays have the same alignment as their type. */
201 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
205 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
207 /* Function vect_mark_relevant.
209 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
212 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
213 enum vect_relevant relevant
, bool live_p
,
214 bool used_in_pattern
)
216 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
217 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
218 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
221 if (dump_enabled_p ())
222 dump_printf_loc (MSG_NOTE
, vect_location
,
223 "mark relevant %d, live %d.\n", relevant
, live_p
);
225 /* If this stmt is an original stmt in a pattern, we might need to mark its
226 related pattern stmt instead of the original stmt. However, such stmts
227 may have their own uses that are not in any pattern, in such cases the
228 stmt itself should be marked. */
229 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
232 if (!used_in_pattern
)
234 imm_use_iterator imm_iter
;
238 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
239 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
241 if (is_gimple_assign (stmt
))
242 lhs
= gimple_assign_lhs (stmt
);
244 lhs
= gimple_call_lhs (stmt
);
246 /* This use is out of pattern use, if LHS has other uses that are
247 pattern uses, we should mark the stmt itself, and not the pattern
249 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
250 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
252 if (is_gimple_debug (USE_STMT (use_p
)))
254 use_stmt
= USE_STMT (use_p
);
256 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
259 if (vinfo_for_stmt (use_stmt
)
260 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
270 /* This is the last stmt in a sequence that was detected as a
271 pattern that can potentially be vectorized. Don't mark the stmt
272 as relevant/live because it's not going to be vectorized.
273 Instead mark the pattern-stmt that replaces it. */
275 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
277 if (dump_enabled_p ())
278 dump_printf_loc (MSG_NOTE
, vect_location
,
279 "last stmt in pattern. don't mark"
280 " relevant/live.\n");
281 stmt_info
= vinfo_for_stmt (pattern_stmt
);
282 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
283 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
284 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
289 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
290 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
291 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
293 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
294 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
296 if (dump_enabled_p ())
297 dump_printf_loc (MSG_NOTE
, vect_location
,
298 "already marked relevant/live.\n");
302 worklist
->safe_push (stmt
);
306 /* Function vect_stmt_relevant_p.
308 Return true if STMT in loop that is represented by LOOP_VINFO is
309 "relevant for vectorization".
311 A stmt is considered "relevant for vectorization" if:
312 - it has uses outside the loop.
313 - it has vdefs (it alters memory).
314 - control stmts in the loop (except for the exit condition).
316 CHECKME: what other side effects would the vectorizer allow? */
319 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
320 enum vect_relevant
*relevant
, bool *live_p
)
322 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
324 imm_use_iterator imm_iter
;
328 *relevant
= vect_unused_in_scope
;
331 /* cond stmt other than loop exit cond. */
332 if (is_ctrl_stmt (stmt
)
333 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
334 != loop_exit_ctrl_vec_info_type
)
335 *relevant
= vect_used_in_scope
;
337 /* changing memory. */
338 if (gimple_code (stmt
) != GIMPLE_PHI
)
339 if (gimple_vdef (stmt
))
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE
, vect_location
,
343 "vec_stmt_relevant_p: stmt has vdefs.\n");
344 *relevant
= vect_used_in_scope
;
347 /* uses outside the loop. */
348 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
350 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
352 basic_block bb
= gimple_bb (USE_STMT (use_p
));
353 if (!flow_bb_inside_loop_p (loop
, bb
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: used out of loop.\n");
359 if (is_gimple_debug (USE_STMT (use_p
)))
362 /* We expect all such uses to be in the loop exit phis
363 (because of loop closed form) */
364 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
365 gcc_assert (bb
== single_exit (loop
)->dest
);
372 return (*live_p
|| *relevant
);
376 /* Function exist_non_indexing_operands_for_use_p
378 USE is one of the uses attached to STMT. Check if USE is
379 used in STMT for anything other than indexing an array. */
382 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
385 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
387 /* USE corresponds to some operand in STMT. If there is no data
388 reference in STMT, then any operand that corresponds to USE
389 is not indexing an array. */
390 if (!STMT_VINFO_DATA_REF (stmt_info
))
393 /* STMT has a data_ref. FORNOW this means that its of one of
397 (This should have been verified in analyze_data_refs).
399 'var' in the second case corresponds to a def, not a use,
400 so USE cannot correspond to any operands that are not used
403 Therefore, all we need to check is if STMT falls into the
404 first case, and whether var corresponds to USE. */
406 if (!gimple_assign_copy_p (stmt
))
408 if (is_gimple_call (stmt
)
409 && gimple_call_internal_p (stmt
))
410 switch (gimple_call_internal_fn (stmt
))
413 operand
= gimple_call_arg (stmt
, 3);
418 operand
= gimple_call_arg (stmt
, 2);
428 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
430 operand
= gimple_assign_rhs1 (stmt
);
431 if (TREE_CODE (operand
) != SSA_NAME
)
442 Function process_use.
445 - a USE in STMT in a loop represented by LOOP_VINFO
446 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
447 that defined USE. This is done by calling mark_relevant and passing it
448 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
449 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
453 Generally, LIVE_P and RELEVANT are used to define the liveness and
454 relevance info of the DEF_STMT of this USE:
455 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
456 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
458 - case 1: If USE is used only for address computations (e.g. array indexing),
459 which does not need to be directly vectorized, then the liveness/relevance
460 of the respective DEF_STMT is left unchanged.
461 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
462 skip DEF_STMT cause it had already been processed.
463 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
464 be modified accordingly.
466 Return true if everything is as expected. Return false otherwise. */
469 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
470 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
473 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
474 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
475 stmt_vec_info dstmt_vinfo
;
476 basic_block bb
, def_bb
;
479 enum vect_def_type dt
;
481 /* case 1: we are only interested in uses that need to be vectorized. Uses
482 that are used for address computation are not considered relevant. */
483 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
486 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
488 if (dump_enabled_p ())
489 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
490 "not vectorized: unsupported use in stmt.\n");
494 if (!def_stmt
|| gimple_nop_p (def_stmt
))
497 def_bb
= gimple_bb (def_stmt
);
498 if (!flow_bb_inside_loop_p (loop
, def_bb
))
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
505 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
506 DEF_STMT must have already been processed, because this should be the
507 only way that STMT, which is a reduction-phi, was put in the worklist,
508 as there should be no other uses for DEF_STMT in the loop. So we just
509 check that everything is as expected, and we are done. */
510 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
511 bb
= gimple_bb (stmt
);
512 if (gimple_code (stmt
) == GIMPLE_PHI
513 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
514 && gimple_code (def_stmt
) != GIMPLE_PHI
515 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
516 && bb
->loop_father
== def_bb
->loop_father
)
518 if (dump_enabled_p ())
519 dump_printf_loc (MSG_NOTE
, vect_location
,
520 "reduc-stmt defining reduc-phi in the same nest.\n");
521 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
522 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
523 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
524 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
525 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
536 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE
, vect_location
,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
544 case vect_unused_in_scope
:
545 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
546 vect_used_in_scope
: vect_unused_in_scope
;
549 case vect_used_in_outer_by_reduction
:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
551 relevant
= vect_used_by_reduction
;
554 case vect_used_in_outer
:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
556 relevant
= vect_used_in_scope
;
559 case vect_used_in_scope
:
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
574 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE
, vect_location
,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
582 case vect_unused_in_scope
:
583 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
585 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
588 case vect_used_by_reduction
:
589 relevant
= vect_used_in_outer_by_reduction
;
592 case vect_used_in_scope
:
593 relevant
= vect_used_in_outer
;
601 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
602 is_pattern_stmt_p (stmt_vinfo
));
607 /* Function vect_mark_stmts_to_be_vectorized.
609 Not all stmts in the loop need to be vectorized. For example:
618 Stmt 1 and 3 do not need to be vectorized, because loop control and
619 addressing of vectorized data-refs are handled differently.
621 This pass detects such stmts. */
624 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
626 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
627 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
628 unsigned int nbbs
= loop
->num_nodes
;
629 gimple_stmt_iterator si
;
632 stmt_vec_info stmt_vinfo
;
636 enum vect_relevant relevant
, tmp_relevant
;
637 enum vect_def_type def_type
;
639 if (dump_enabled_p ())
640 dump_printf_loc (MSG_NOTE
, vect_location
,
641 "=== vect_mark_stmts_to_be_vectorized ===\n");
643 auto_vec
<gimple
, 64> worklist
;
645 /* 1. Init worklist. */
646 for (i
= 0; i
< nbbs
; i
++)
649 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
652 if (dump_enabled_p ())
654 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
655 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
656 dump_printf (MSG_NOTE
, "\n");
659 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
660 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
662 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
664 stmt
= gsi_stmt (si
);
665 if (dump_enabled_p ())
667 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
668 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
669 dump_printf (MSG_NOTE
, "\n");
672 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
673 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
677 /* 2. Process_worklist */
678 while (worklist
.length () > 0)
683 stmt
= worklist
.pop ();
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
687 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
688 dump_printf (MSG_NOTE
, "\n");
691 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
692 (DEF_STMT) as relevant/irrelevant and live/dead according to the
693 liveness and relevance properties of STMT. */
694 stmt_vinfo
= vinfo_for_stmt (stmt
);
695 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
696 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
698 /* Generally, the liveness and relevance properties of STMT are
699 propagated as is to the DEF_STMTs of its USEs:
700 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
701 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the liveness/relevance as follows:
706 relevant = vect_used_by_reduction
707 This is because we distinguish between two kinds of relevant stmts -
708 those that are used by a reduction computation, and those that are
709 (also) used by a regular computation. This allows us later on to
710 identify stmts that are used solely by a reduction, and therefore the
711 order of the results that they produce does not have to be kept. */
713 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
714 tmp_relevant
= relevant
;
717 case vect_reduction_def
:
718 switch (tmp_relevant
)
720 case vect_unused_in_scope
:
721 relevant
= vect_used_by_reduction
;
724 case vect_used_by_reduction
:
725 if (gimple_code (stmt
) == GIMPLE_PHI
)
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
732 "unsupported use of reduction.\n");
739 case vect_nested_cycle
:
740 if (tmp_relevant
!= vect_unused_in_scope
741 && tmp_relevant
!= vect_used_in_outer_by_reduction
742 && tmp_relevant
!= vect_used_in_outer
)
744 if (dump_enabled_p ())
745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
746 "unsupported use of nested cycle.\n");
754 case vect_double_reduction_def
:
755 if (tmp_relevant
!= vect_unused_in_scope
756 && tmp_relevant
!= vect_used_by_reduction
)
758 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
760 "unsupported use of double reduction.\n");
772 if (is_pattern_stmt_p (stmt_vinfo
))
774 /* Pattern statements are not inserted into the code, so
775 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
776 have to scan the RHS or function arguments instead. */
777 if (is_gimple_assign (stmt
))
779 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
780 tree op
= gimple_assign_rhs1 (stmt
);
783 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
785 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
786 live_p
, relevant
, &worklist
, false)
787 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
788 live_p
, relevant
, &worklist
, false))
792 for (; i
< gimple_num_ops (stmt
); i
++)
794 op
= gimple_op (stmt
, i
);
795 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
800 else if (is_gimple_call (stmt
))
802 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
804 tree arg
= gimple_call_arg (stmt
, i
);
805 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
812 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
814 tree op
= USE_FROM_PTR (use_p
);
815 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
820 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
823 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
825 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
829 } /* while worklist */
835 /* Function vect_model_simple_cost.
837 Models cost for simple operations, i.e. those that only emit ncopies of a
838 single op. Right now, this does not account for multiple insns that could
839 be generated for the single vector op. We will handle that shortly. */
842 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
843 enum vect_def_type
*dt
,
844 stmt_vector_for_cost
*prologue_cost_vec
,
845 stmt_vector_for_cost
*body_cost_vec
)
848 int inside_cost
= 0, prologue_cost
= 0;
850 /* The SLP costs were already calculated during SLP tree build. */
851 if (PURE_SLP_STMT (stmt_info
))
854 /* FORNOW: Assuming maximum 2 args per stmts. */
855 for (i
= 0; i
< 2; i
++)
856 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
857 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
858 stmt_info
, 0, vect_prologue
);
860 /* Pass the inside-of-loop statements to the target-specific cost model. */
861 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
862 stmt_info
, 0, vect_body
);
864 if (dump_enabled_p ())
865 dump_printf_loc (MSG_NOTE
, vect_location
,
866 "vect_model_simple_cost: inside_cost = %d, "
867 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
871 /* Model cost for type demotion and promotion operations. PWR is normally
872 zero for single-step promotions and demotions. It will be one if
873 two-step promotion/demotion is required, and so on. Each additional
874 step doubles the number of instructions required. */
877 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
878 enum vect_def_type
*dt
, int pwr
)
881 int inside_cost
= 0, prologue_cost
= 0;
882 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
883 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
884 void *target_cost_data
;
886 /* The SLP costs were already calculated during SLP tree build. */
887 if (PURE_SLP_STMT (stmt_info
))
891 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
893 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
895 for (i
= 0; i
< pwr
+ 1; i
++)
897 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
899 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
900 vec_promote_demote
, stmt_info
, 0,
904 /* FORNOW: Assuming maximum 2 args per stmts. */
905 for (i
= 0; i
< 2; i
++)
906 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
907 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
908 stmt_info
, 0, vect_prologue
);
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE
, vect_location
,
912 "vect_model_promotion_demotion_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
916 /* Function vect_cost_group_size
918 For grouped load or store, return the group_size only if it is the first
919 load or store of a group, else return 1. This ensures that group size is
920 only returned once per group. */
923 vect_cost_group_size (stmt_vec_info stmt_info
)
925 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
927 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
928 return GROUP_SIZE (stmt_info
);
934 /* Function vect_model_store_cost
936 Models cost for stores. In the case of grouped accesses, one access
937 has the overhead of the grouped access attributed to it. */
940 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
941 bool store_lanes_p
, enum vect_def_type dt
,
943 stmt_vector_for_cost
*prologue_cost_vec
,
944 stmt_vector_for_cost
*body_cost_vec
)
947 unsigned int inside_cost
= 0, prologue_cost
= 0;
948 struct data_reference
*first_dr
;
951 /* The SLP costs were already calculated during SLP tree build. */
952 if (PURE_SLP_STMT (stmt_info
))
955 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
956 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
957 stmt_info
, 0, vect_prologue
);
959 /* Grouped access? */
960 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
964 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
969 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
970 group_size
= vect_cost_group_size (stmt_info
);
973 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
975 /* Not a grouped access. */
979 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
982 /* We assume that the cost of a single store-lanes instruction is
983 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
984 access is instead being provided by a permute-and-store operation,
985 include the cost of the permutes. */
986 if (!store_lanes_p
&& group_size
> 1)
988 /* Uses a high and low interleave or shuffle operations for each
990 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
991 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
992 stmt_info
, 0, vect_body
);
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_NOTE
, vect_location
,
996 "vect_model_store_cost: strided group_size = %d .\n",
1000 /* Costs of the stores. */
1001 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1003 if (dump_enabled_p ())
1004 dump_printf_loc (MSG_NOTE
, vect_location
,
1005 "vect_model_store_cost: inside_cost = %d, "
1006 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1010 /* Calculate cost of DR's memory access. */
1012 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1013 unsigned int *inside_cost
,
1014 stmt_vector_for_cost
*body_cost_vec
)
1016 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1017 gimple stmt
= DR_STMT (dr
);
1018 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1020 switch (alignment_support_scheme
)
1024 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1025 vector_store
, stmt_info
, 0,
1028 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_NOTE
, vect_location
,
1030 "vect_model_store_cost: aligned.\n");
1034 case dr_unaligned_supported
:
1036 /* Here, we assign an additional cost for the unaligned store. */
1037 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1038 unaligned_store
, stmt_info
,
1039 DR_MISALIGNMENT (dr
), vect_body
);
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_NOTE
, vect_location
,
1042 "vect_model_store_cost: unaligned supported by "
1047 case dr_unaligned_unsupported
:
1049 *inside_cost
= VECT_MAX_COST
;
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1053 "vect_model_store_cost: unsupported access.\n");
1063 /* Function vect_model_load_cost
1065 Models cost for loads. In the case of grouped accesses, the last access
1066 has the overhead of the grouped access attributed to it. Since unaligned
1067 accesses are supported for loads, we also account for the costs of the
1068 access scheme chosen. */
1071 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1072 bool load_lanes_p
, slp_tree slp_node
,
1073 stmt_vector_for_cost
*prologue_cost_vec
,
1074 stmt_vector_for_cost
*body_cost_vec
)
1078 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1079 unsigned int inside_cost
= 0, prologue_cost
= 0;
1081 /* The SLP costs were already calculated during SLP tree build. */
1082 if (PURE_SLP_STMT (stmt_info
))
1085 /* Grouped accesses? */
1086 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1087 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1089 group_size
= vect_cost_group_size (stmt_info
);
1090 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1092 /* Not a grouped access. */
1099 /* We assume that the cost of a single load-lanes instruction is
1100 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1101 access is instead being provided by a load-and-permute operation,
1102 include the cost of the permutes. */
1103 if (!load_lanes_p
&& group_size
> 1)
1105 /* Uses an even and odd extract operations or shuffle operations
1106 for each needed permute. */
1107 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1108 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1109 stmt_info
, 0, vect_body
);
1111 if (dump_enabled_p ())
1112 dump_printf_loc (MSG_NOTE
, vect_location
,
1113 "vect_model_load_cost: strided group_size = %d .\n",
1117 /* The loads themselves. */
1118 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1120 /* N scalar loads plus gathering them into a vector. */
1121 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1122 inside_cost
+= record_stmt_cost (body_cost_vec
,
1123 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1124 scalar_load
, stmt_info
, 0, vect_body
);
1125 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1126 stmt_info
, 0, vect_body
);
1129 vect_get_load_cost (first_dr
, ncopies
,
1130 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1131 || group_size
> 1 || slp_node
),
1132 &inside_cost
, &prologue_cost
,
1133 prologue_cost_vec
, body_cost_vec
, true);
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_NOTE
, vect_location
,
1137 "vect_model_load_cost: inside_cost = %d, "
1138 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1142 /* Calculate cost of DR's memory access. */
1144 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1145 bool add_realign_cost
, unsigned int *inside_cost
,
1146 unsigned int *prologue_cost
,
1147 stmt_vector_for_cost
*prologue_cost_vec
,
1148 stmt_vector_for_cost
*body_cost_vec
,
1149 bool record_prologue_costs
)
1151 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1152 gimple stmt
= DR_STMT (dr
);
1153 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1155 switch (alignment_support_scheme
)
1159 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1160 stmt_info
, 0, vect_body
);
1162 if (dump_enabled_p ())
1163 dump_printf_loc (MSG_NOTE
, vect_location
,
1164 "vect_model_load_cost: aligned.\n");
1168 case dr_unaligned_supported
:
1170 /* Here, we assign an additional cost for the unaligned load. */
1171 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1172 unaligned_load
, stmt_info
,
1173 DR_MISALIGNMENT (dr
), vect_body
);
1175 if (dump_enabled_p ())
1176 dump_printf_loc (MSG_NOTE
, vect_location
,
1177 "vect_model_load_cost: unaligned supported by "
1182 case dr_explicit_realign
:
1184 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1185 vector_load
, stmt_info
, 0, vect_body
);
1186 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1187 vec_perm
, stmt_info
, 0, vect_body
);
1189 /* FIXME: If the misalignment remains fixed across the iterations of
1190 the containing loop, the following cost should be added to the
1192 if (targetm
.vectorize
.builtin_mask_for_load
)
1193 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1194 stmt_info
, 0, vect_body
);
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE
, vect_location
,
1198 "vect_model_load_cost: explicit realign\n");
1202 case dr_explicit_realign_optimized
:
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE
, vect_location
,
1206 "vect_model_load_cost: unaligned software "
1209 /* Unaligned software pipeline has a load of an address, an initial
1210 load, and possibly a mask operation to "prime" the loop. However,
1211 if this is an access in a group of loads, which provide grouped
1212 access, then the above cost should only be considered for one
1213 access in the group. Inside the loop, there is a load op
1214 and a realignment op. */
1216 if (add_realign_cost
&& record_prologue_costs
)
1218 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1219 vector_stmt
, stmt_info
,
1221 if (targetm
.vectorize
.builtin_mask_for_load
)
1222 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1223 vector_stmt
, stmt_info
,
1227 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1228 stmt_info
, 0, vect_body
);
1229 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1230 stmt_info
, 0, vect_body
);
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE
, vect_location
,
1234 "vect_model_load_cost: explicit realign optimized"
1240 case dr_unaligned_unsupported
:
1242 *inside_cost
= VECT_MAX_COST
;
1244 if (dump_enabled_p ())
1245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1246 "vect_model_load_cost: unsupported access.\n");
1255 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1256 the loop preheader for the vectorized stmt STMT. */
1259 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1262 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1265 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1266 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1270 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1274 if (nested_in_vect_loop_p (loop
, stmt
))
1277 pe
= loop_preheader_edge (loop
);
1278 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1279 gcc_assert (!new_bb
);
1283 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1285 gimple_stmt_iterator gsi_bb_start
;
1287 gcc_assert (bb_vinfo
);
1288 bb
= BB_VINFO_BB (bb_vinfo
);
1289 gsi_bb_start
= gsi_after_labels (bb
);
1290 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1294 if (dump_enabled_p ())
1296 dump_printf_loc (MSG_NOTE
, vect_location
,
1297 "created new init_stmt: ");
1298 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1299 dump_printf (MSG_NOTE
, "\n");
1303 /* Function vect_init_vector.
1305 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1306 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1307 vector type a vector with all elements equal to VAL is created first.
1308 Place the initialization at BSI if it is not NULL. Otherwise, place the
1309 initialization at the loop preheader.
1310 Return the DEF of INIT_STMT.
1311 It will be used in the vectorization of STMT. */
1314 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1321 if (TREE_CODE (type
) == VECTOR_TYPE
1322 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1324 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1326 if (CONSTANT_CLASS_P (val
))
1327 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1330 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1331 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1334 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1338 val
= build_vector_from_val (type
, val
);
1341 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1342 init_stmt
= gimple_build_assign (new_var
, val
);
1343 new_temp
= make_ssa_name (new_var
, init_stmt
);
1344 gimple_assign_set_lhs (init_stmt
, new_temp
);
1345 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1346 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1351 /* Function vect_get_vec_def_for_operand.
1353 OP is an operand in STMT. This function returns a (vector) def that will be
1354 used in the vectorized stmt for STMT.
1356 In the case that OP is an SSA_NAME which is defined in the loop, then
1357 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1359 In case OP is an invariant or constant, a new stmt that creates a vector def
1360 needs to be introduced. */
1363 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1368 stmt_vec_info def_stmt_info
= NULL
;
1369 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1370 unsigned int nunits
;
1371 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1373 enum vect_def_type dt
;
1377 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_NOTE
, vect_location
,
1380 "vect_get_vec_def_for_operand: ");
1381 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1382 dump_printf (MSG_NOTE
, "\n");
1385 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1386 &def_stmt
, &def
, &dt
);
1387 gcc_assert (is_simple_use
);
1388 if (dump_enabled_p ())
1390 int loc_printed
= 0;
1393 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1395 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1396 dump_printf (MSG_NOTE
, "\n");
1401 dump_printf (MSG_NOTE
, " def_stmt = ");
1403 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1404 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1405 dump_printf (MSG_NOTE
, "\n");
1411 /* Case 1: operand is a constant. */
1412 case vect_constant_def
:
1414 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1415 gcc_assert (vector_type
);
1416 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1421 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1422 if (dump_enabled_p ())
1423 dump_printf_loc (MSG_NOTE
, vect_location
,
1424 "Create vector_cst. nunits = %d\n", nunits
);
1426 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1429 /* Case 2: operand is defined outside the loop - loop invariant. */
1430 case vect_external_def
:
1432 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1433 gcc_assert (vector_type
);
1438 /* Create 'vec_inv = {inv,inv,..,inv}' */
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1442 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1445 /* Case 3: operand is defined inside the loop. */
1446 case vect_internal_def
:
1449 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1451 /* Get the def from the vectorized stmt. */
1452 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1454 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1455 /* Get vectorized pattern statement. */
1457 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1458 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1459 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1460 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1461 gcc_assert (vec_stmt
);
1462 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1463 vec_oprnd
= PHI_RESULT (vec_stmt
);
1464 else if (is_gimple_call (vec_stmt
))
1465 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1467 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1471 /* Case 4: operand is defined by a loop header phi - reduction */
1472 case vect_reduction_def
:
1473 case vect_double_reduction_def
:
1474 case vect_nested_cycle
:
1478 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1479 loop
= (gimple_bb (def_stmt
))->loop_father
;
1481 /* Get the def before the loop */
1482 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1483 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1486 /* Case 5: operand is defined by loop-header phi - induction. */
1487 case vect_induction_def
:
1489 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1491 /* Get the def from the vectorized stmt. */
1492 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1493 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1494 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1495 vec_oprnd
= PHI_RESULT (vec_stmt
);
1497 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1507 /* Function vect_get_vec_def_for_stmt_copy
1509 Return a vector-def for an operand. This function is used when the
1510 vectorized stmt to be created (by the caller to this function) is a "copy"
1511 created in case the vectorized result cannot fit in one vector, and several
1512 copies of the vector-stmt are required. In this case the vector-def is
1513 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1514 of the stmt that defines VEC_OPRND.
1515 DT is the type of the vector def VEC_OPRND.
1518 In case the vectorization factor (VF) is bigger than the number
1519 of elements that can fit in a vectype (nunits), we have to generate
1520 more than one vector stmt to vectorize the scalar stmt. This situation
1521 arises when there are multiple data-types operated upon in the loop; the
1522 smallest data-type determines the VF, and as a result, when vectorizing
1523 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1524 vector stmt (each computing a vector of 'nunits' results, and together
1525 computing 'VF' results in each iteration). This function is called when
1526 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1527 which VF=16 and nunits=4, so the number of copies required is 4):
1529 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1531 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1532 VS1.1: vx.1 = memref1 VS1.2
1533 VS1.2: vx.2 = memref2 VS1.3
1534 VS1.3: vx.3 = memref3
1536 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1537 VSnew.1: vz1 = vx.1 + ... VSnew.2
1538 VSnew.2: vz2 = vx.2 + ... VSnew.3
1539 VSnew.3: vz3 = vx.3 + ...
1541 The vectorization of S1 is explained in vectorizable_load.
1542 The vectorization of S2:
1543 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1544 the function 'vect_get_vec_def_for_operand' is called to
1545 get the relevant vector-def for each operand of S2. For operand x it
1546 returns the vector-def 'vx.0'.
1548 To create the remaining copies of the vector-stmt (VSnew.j), this
1549 function is called to get the relevant vector-def for each operand. It is
1550 obtained from the respective VS1.j stmt, which is recorded in the
1551 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1553 For example, to obtain the vector-def 'vx.1' in order to create the
1554 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1555 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1556 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1557 and return its def ('vx.1').
1558 Overall, to create the above sequence this function will be called 3 times:
1559 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1560 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1561 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1564 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1566 gimple vec_stmt_for_operand
;
1567 stmt_vec_info def_stmt_info
;
1569 /* Do nothing; can reuse same def. */
1570 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1573 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1574 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1575 gcc_assert (def_stmt_info
);
1576 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1577 gcc_assert (vec_stmt_for_operand
);
1578 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1579 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1580 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1582 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1587 /* Get vectorized definitions for the operands to create a copy of an original
1588 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1591 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1592 vec
<tree
> *vec_oprnds0
,
1593 vec
<tree
> *vec_oprnds1
)
1595 tree vec_oprnd
= vec_oprnds0
->pop ();
1597 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1598 vec_oprnds0
->quick_push (vec_oprnd
);
1600 if (vec_oprnds1
&& vec_oprnds1
->length ())
1602 vec_oprnd
= vec_oprnds1
->pop ();
1603 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1604 vec_oprnds1
->quick_push (vec_oprnd
);
1609 /* Get vectorized definitions for OP0 and OP1.
1610 REDUC_INDEX is the index of reduction operand in case of reduction,
1611 and -1 otherwise. */
1614 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1615 vec
<tree
> *vec_oprnds0
,
1616 vec
<tree
> *vec_oprnds1
,
1617 slp_tree slp_node
, int reduc_index
)
1621 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1622 auto_vec
<tree
> ops (nops
);
1623 auto_vec
<vec
<tree
> > vec_defs (nops
);
1625 ops
.quick_push (op0
);
1627 ops
.quick_push (op1
);
1629 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1631 *vec_oprnds0
= vec_defs
[0];
1633 *vec_oprnds1
= vec_defs
[1];
1639 vec_oprnds0
->create (1);
1640 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1641 vec_oprnds0
->quick_push (vec_oprnd
);
1645 vec_oprnds1
->create (1);
1646 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1647 vec_oprnds1
->quick_push (vec_oprnd
);
1653 /* Function vect_finish_stmt_generation.
1655 Insert a new stmt. */
1658 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1659 gimple_stmt_iterator
*gsi
)
1661 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1662 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1663 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1665 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1667 if (!gsi_end_p (*gsi
)
1668 && gimple_has_mem_ops (vec_stmt
))
1670 gimple at_stmt
= gsi_stmt (*gsi
);
1671 tree vuse
= gimple_vuse (at_stmt
);
1672 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1674 tree vdef
= gimple_vdef (at_stmt
);
1675 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1676 /* If we have an SSA vuse and insert a store, update virtual
1677 SSA form to avoid triggering the renamer. Do so only
1678 if we can easily see all uses - which is what almost always
1679 happens with the way vectorized stmts are inserted. */
1680 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1681 && ((is_gimple_assign (vec_stmt
)
1682 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1683 || (is_gimple_call (vec_stmt
)
1684 && !(gimple_call_flags (vec_stmt
)
1685 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1687 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1688 gimple_set_vdef (vec_stmt
, new_vdef
);
1689 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1693 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1695 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1698 if (dump_enabled_p ())
1700 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1701 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1702 dump_printf (MSG_NOTE
, "\n");
1705 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1707 /* While EH edges will generally prevent vectorization, stmt might
1708 e.g. be in a must-not-throw region. Ensure newly created stmts
1709 that could throw are part of the same region. */
1710 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1711 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1712 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1715 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1716 a function declaration if the target has a vectorized version
1717 of the function, or NULL_TREE if the function cannot be vectorized. */
1720 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1722 tree fndecl
= gimple_call_fndecl (call
);
1724 /* We only handle functions that do not read or clobber memory -- i.e.
1725 const or novops ones. */
1726 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1730 || TREE_CODE (fndecl
) != FUNCTION_DECL
1731 || !DECL_BUILT_IN (fndecl
))
1734 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1739 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1740 gimple_stmt_iterator
*);
1743 /* Function vectorizable_mask_load_store.
1745 Check if STMT performs a conditional load or store that can be vectorized.
1746 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1747 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1748 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1751 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1752 gimple
*vec_stmt
, slp_tree slp_node
)
1754 tree vec_dest
= NULL
;
1755 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1756 stmt_vec_info prev_stmt_info
;
1757 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1758 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1759 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1760 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1761 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1765 tree dataref_ptr
= NULL_TREE
;
1767 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1771 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1772 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1773 int gather_scale
= 1;
1774 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1779 enum vect_def_type dt
;
1781 if (slp_node
!= NULL
)
1784 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1785 gcc_assert (ncopies
>= 1);
1787 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1788 mask
= gimple_call_arg (stmt
, 2);
1789 if (TYPE_PRECISION (TREE_TYPE (mask
))
1790 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1793 /* FORNOW. This restriction should be relaxed. */
1794 if (nested_in_vect_loop
&& ncopies
> 1)
1796 if (dump_enabled_p ())
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1798 "multiple types in nested loop.");
1802 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1805 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1808 if (!STMT_VINFO_DATA_REF (stmt_info
))
1811 elem_type
= TREE_TYPE (vectype
);
1813 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1816 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1819 if (STMT_VINFO_GATHER_P (stmt_info
))
1823 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1824 &gather_off
, &gather_scale
);
1825 gcc_assert (gather_decl
);
1826 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1827 &def_stmt
, &def
, &gather_dt
,
1828 &gather_off_vectype
))
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1832 "gather index use not simple.");
1836 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1838 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1839 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1843 "masked gather with integer mask not supported.");
1847 else if (tree_int_cst_compare (nested_in_vect_loop
1848 ? STMT_VINFO_DR_STEP (stmt_info
)
1849 : DR_STEP (dr
), size_zero_node
) <= 0)
1851 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1852 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1855 if (TREE_CODE (mask
) != SSA_NAME
)
1858 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1859 &def_stmt
, &def
, &dt
))
1864 tree rhs
= gimple_call_arg (stmt
, 3);
1865 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1866 &def_stmt
, &def
, &dt
))
1870 if (!vec_stmt
) /* transformation not required. */
1872 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1874 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1877 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1883 if (STMT_VINFO_GATHER_P (stmt_info
))
1885 tree vec_oprnd0
= NULL_TREE
, op
;
1886 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1887 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1888 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1889 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1890 tree mask_perm_mask
= NULL_TREE
;
1891 edge pe
= loop_preheader_edge (loop
);
1894 enum { NARROW
, NONE
, WIDEN
} modifier
;
1895 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1897 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1898 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1899 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1900 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1901 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1902 scaletype
= TREE_VALUE (arglist
);
1903 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1904 && types_compatible_p (srctype
, masktype
));
1906 if (nunits
== gather_off_nunits
)
1908 else if (nunits
== gather_off_nunits
/ 2)
1910 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1913 for (i
= 0; i
< gather_off_nunits
; ++i
)
1914 sel
[i
] = i
| nunits
;
1916 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
1917 gcc_assert (perm_mask
!= NULL_TREE
);
1919 else if (nunits
== gather_off_nunits
* 2)
1921 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1924 for (i
= 0; i
< nunits
; ++i
)
1925 sel
[i
] = i
< gather_off_nunits
1926 ? i
: i
+ nunits
- gather_off_nunits
;
1928 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
1929 gcc_assert (perm_mask
!= NULL_TREE
);
1931 for (i
= 0; i
< nunits
; ++i
)
1932 sel
[i
] = i
| gather_off_nunits
;
1933 mask_perm_mask
= vect_gen_perm_mask (masktype
, sel
);
1934 gcc_assert (mask_perm_mask
!= NULL_TREE
);
1939 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1941 ptr
= fold_convert (ptrtype
, gather_base
);
1942 if (!is_gimple_min_invariant (ptr
))
1944 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1945 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1946 gcc_assert (!new_bb
);
1949 scale
= build_int_cst (scaletype
, gather_scale
);
1951 prev_stmt_info
= NULL
;
1952 for (j
= 0; j
< ncopies
; ++j
)
1954 if (modifier
== WIDEN
&& (j
& 1))
1955 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1956 perm_mask
, stmt
, gsi
);
1959 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1962 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1964 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1966 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1967 == TYPE_VECTOR_SUBPARTS (idxtype
));
1968 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1969 var
= make_ssa_name (var
, NULL
);
1970 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1972 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
1974 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1978 if (mask_perm_mask
&& (j
& 1))
1979 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1980 mask_perm_mask
, stmt
, gsi
);
1984 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1987 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
1988 &def_stmt
, &def
, &dt
);
1989 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1993 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1995 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1996 == TYPE_VECTOR_SUBPARTS (masktype
));
1997 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
1999 var
= make_ssa_name (var
, NULL
);
2000 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2002 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
2003 mask_op
, NULL_TREE
);
2004 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2010 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2013 if (!useless_type_conversion_p (vectype
, rettype
))
2015 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2016 == TYPE_VECTOR_SUBPARTS (rettype
));
2017 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2018 op
= make_ssa_name (var
, new_stmt
);
2019 gimple_call_set_lhs (new_stmt
, op
);
2020 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2021 var
= make_ssa_name (vec_dest
, NULL
);
2022 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2024 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
2029 var
= make_ssa_name (vec_dest
, new_stmt
);
2030 gimple_call_set_lhs (new_stmt
, var
);
2033 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2035 if (modifier
== NARROW
)
2042 var
= permute_vec_elements (prev_res
, var
,
2043 perm_mask
, stmt
, gsi
);
2044 new_stmt
= SSA_NAME_DEF_STMT (var
);
2047 if (prev_stmt_info
== NULL
)
2048 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2050 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2051 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2054 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2056 tree lhs
= gimple_call_lhs (stmt
);
2057 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2058 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2059 set_vinfo_for_stmt (stmt
, NULL
);
2060 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2061 gsi_replace (gsi
, new_stmt
, true);
2066 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2067 prev_stmt_info
= NULL
;
2068 for (i
= 0; i
< ncopies
; i
++)
2070 unsigned align
, misalign
;
2074 tree rhs
= gimple_call_arg (stmt
, 3);
2075 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2076 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2077 /* We should have catched mismatched types earlier. */
2078 gcc_assert (useless_type_conversion_p (vectype
,
2079 TREE_TYPE (vec_rhs
)));
2080 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2081 NULL_TREE
, &dummy
, gsi
,
2082 &ptr_incr
, false, &inv_p
);
2083 gcc_assert (!inv_p
);
2087 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2089 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2090 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2092 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2093 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2094 TYPE_SIZE_UNIT (vectype
));
2097 align
= TYPE_ALIGN_UNIT (vectype
);
2098 if (aligned_access_p (dr
))
2100 else if (DR_MISALIGNMENT (dr
) == -1)
2102 align
= TYPE_ALIGN_UNIT (elem_type
);
2106 misalign
= DR_MISALIGNMENT (dr
);
2107 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2110 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2111 gimple_call_arg (stmt
, 1),
2113 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2115 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2117 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2118 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2123 tree vec_mask
= NULL_TREE
;
2124 prev_stmt_info
= NULL
;
2125 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2126 for (i
= 0; i
< ncopies
; i
++)
2128 unsigned align
, misalign
;
2132 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2133 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2134 NULL_TREE
, &dummy
, gsi
,
2135 &ptr_incr
, false, &inv_p
);
2136 gcc_assert (!inv_p
);
2140 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2142 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2143 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2144 TYPE_SIZE_UNIT (vectype
));
2147 align
= TYPE_ALIGN_UNIT (vectype
);
2148 if (aligned_access_p (dr
))
2150 else if (DR_MISALIGNMENT (dr
) == -1)
2152 align
= TYPE_ALIGN_UNIT (elem_type
);
2156 misalign
= DR_MISALIGNMENT (dr
);
2157 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2160 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2161 gimple_call_arg (stmt
, 1),
2163 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
, NULL
));
2164 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2166 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2168 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2169 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2175 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2177 tree lhs
= gimple_call_lhs (stmt
);
2178 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2179 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2180 set_vinfo_for_stmt (stmt
, NULL
);
2181 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2182 gsi_replace (gsi
, new_stmt
, true);
2189 /* Function vectorizable_call.
2191 Check if STMT performs a function call that can be vectorized.
2192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2197 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2203 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2204 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2205 tree vectype_out
, vectype_in
;
2208 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2209 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2210 tree fndecl
, new_temp
, def
, rhs_type
;
2212 enum vect_def_type dt
[3]
2213 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2214 gimple new_stmt
= NULL
;
2216 vec
<tree
> vargs
= vNULL
;
2217 enum { NARROW
, NONE
, WIDEN
} modifier
;
2221 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2224 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2227 /* Is STMT a vectorizable call? */
2228 if (!is_gimple_call (stmt
))
2231 if (gimple_call_internal_p (stmt
)
2232 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2233 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2234 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2237 if (gimple_call_lhs (stmt
) == NULL_TREE
2238 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2241 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2243 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2245 /* Process function arguments. */
2246 rhs_type
= NULL_TREE
;
2247 vectype_in
= NULL_TREE
;
2248 nargs
= gimple_call_num_args (stmt
);
2250 /* Bail out if the function has more than three arguments, we do not have
2251 interesting builtin functions to vectorize with more than two arguments
2252 except for fma. No arguments is also not good. */
2253 if (nargs
== 0 || nargs
> 3)
2256 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2257 if (gimple_call_internal_p (stmt
)
2258 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2261 rhs_type
= unsigned_type_node
;
2264 for (i
= 0; i
< nargs
; i
++)
2268 op
= gimple_call_arg (stmt
, i
);
2270 /* We can only handle calls with arguments of the same type. */
2272 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2276 "argument types differ.\n");
2280 rhs_type
= TREE_TYPE (op
);
2282 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2283 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2287 "use not simple.\n");
2292 vectype_in
= opvectype
;
2294 && opvectype
!= vectype_in
)
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2298 "argument vector types differ.\n");
2302 /* If all arguments are external or constant defs use a vector type with
2303 the same size as the output vector type. */
2305 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2307 gcc_assert (vectype_in
);
2310 if (dump_enabled_p ())
2312 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2313 "no vectype for scalar type ");
2314 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2315 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2322 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2323 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2324 if (nunits_in
== nunits_out
/ 2)
2326 else if (nunits_out
== nunits_in
)
2328 else if (nunits_out
== nunits_in
/ 2)
2333 /* For now, we only vectorize functions if a target specific builtin
2334 is available. TODO -- in some cases, it might be profitable to
2335 insert the calls for pieces of the vector, in order to be able
2336 to vectorize other operations in the loop. */
2337 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2338 if (fndecl
== NULL_TREE
)
2340 if (gimple_call_internal_p (stmt
)
2341 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2344 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2345 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2346 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2347 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2349 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2350 { 0, 1, 2, ... vf - 1 } vector. */
2351 gcc_assert (nargs
== 0);
2355 if (dump_enabled_p ())
2356 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2357 "function is not vectorizable.\n");
2362 gcc_assert (!gimple_vuse (stmt
));
2364 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2366 else if (modifier
== NARROW
)
2367 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2369 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2371 /* Sanity check: make sure that at least one copy of the vectorized stmt
2372 needs to be generated. */
2373 gcc_assert (ncopies
>= 1);
2375 if (!vec_stmt
) /* transformation not required. */
2377 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2381 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2391 scalar_dest
= gimple_call_lhs (stmt
);
2392 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2394 prev_stmt_info
= NULL
;
2398 for (j
= 0; j
< ncopies
; ++j
)
2400 /* Build argument list for the vectorized call. */
2402 vargs
.create (nargs
);
2408 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2409 vec
<tree
> vec_oprnds0
;
2411 for (i
= 0; i
< nargs
; i
++)
2412 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2413 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2414 vec_oprnds0
= vec_defs
[0];
2416 /* Arguments are ready. Create the new vector stmt. */
2417 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2420 for (k
= 0; k
< nargs
; k
++)
2422 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2423 vargs
[k
] = vec_oprndsk
[i
];
2425 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2426 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2427 gimple_call_set_lhs (new_stmt
, new_temp
);
2428 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2429 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2432 for (i
= 0; i
< nargs
; i
++)
2434 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2435 vec_oprndsi
.release ();
2440 for (i
= 0; i
< nargs
; i
++)
2442 op
= gimple_call_arg (stmt
, i
);
2445 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2448 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2450 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2453 vargs
.quick_push (vec_oprnd0
);
2456 if (gimple_call_internal_p (stmt
)
2457 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2459 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2461 for (k
= 0; k
< nunits_out
; ++k
)
2462 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2463 tree cst
= build_vector (vectype_out
, v
);
2465 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2466 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2467 new_temp
= make_ssa_name (new_var
, init_stmt
);
2468 gimple_assign_set_lhs (init_stmt
, new_temp
);
2469 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2470 new_temp
= make_ssa_name (vec_dest
, NULL
);
2471 new_stmt
= gimple_build_assign (new_temp
,
2472 gimple_assign_lhs (init_stmt
));
2476 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2477 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2478 gimple_call_set_lhs (new_stmt
, new_temp
);
2480 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2483 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2485 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2487 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2493 for (j
= 0; j
< ncopies
; ++j
)
2495 /* Build argument list for the vectorized call. */
2497 vargs
.create (nargs
* 2);
2503 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2504 vec
<tree
> vec_oprnds0
;
2506 for (i
= 0; i
< nargs
; i
++)
2507 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2508 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2509 vec_oprnds0
= vec_defs
[0];
2511 /* Arguments are ready. Create the new vector stmt. */
2512 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2516 for (k
= 0; k
< nargs
; k
++)
2518 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2519 vargs
.quick_push (vec_oprndsk
[i
]);
2520 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2522 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2523 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2524 gimple_call_set_lhs (new_stmt
, new_temp
);
2525 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2526 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2529 for (i
= 0; i
< nargs
; i
++)
2531 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2532 vec_oprndsi
.release ();
2537 for (i
= 0; i
< nargs
; i
++)
2539 op
= gimple_call_arg (stmt
, i
);
2543 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2545 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2549 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2551 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2553 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2556 vargs
.quick_push (vec_oprnd0
);
2557 vargs
.quick_push (vec_oprnd1
);
2560 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2561 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2562 gimple_call_set_lhs (new_stmt
, new_temp
);
2563 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2566 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2568 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2570 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2573 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2578 /* No current target implements this case. */
2584 /* The call in STMT might prevent it from being removed in dce.
2585 We however cannot remove it here, due to the way the ssa name
2586 it defines is mapped to the new definition. So just replace
2587 rhs of the statement with something harmless. */
2592 type
= TREE_TYPE (scalar_dest
);
2593 if (is_pattern_stmt_p (stmt_info
))
2594 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2596 lhs
= gimple_call_lhs (stmt
);
2597 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2598 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2599 set_vinfo_for_stmt (stmt
, NULL
);
2600 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2601 gsi_replace (gsi
, new_stmt
, false);
2607 struct simd_call_arg_info
2611 enum vect_def_type dt
;
2612 HOST_WIDE_INT linear_step
;
2616 /* Function vectorizable_simd_clone_call.
2618 Check if STMT performs a function call that can be vectorized
2619 by calling a simd clone of the function.
2620 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2621 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2622 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2625 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2626 gimple
*vec_stmt
, slp_tree slp_node
)
2631 tree vec_oprnd0
= NULL_TREE
;
2632 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2634 unsigned int nunits
;
2635 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2636 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2637 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2638 tree fndecl
, new_temp
, def
;
2640 gimple new_stmt
= NULL
;
2642 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2643 vec
<tree
> vargs
= vNULL
;
2645 tree lhs
, rtype
, ratype
;
2646 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2648 /* Is STMT a vectorizable call? */
2649 if (!is_gimple_call (stmt
))
2652 fndecl
= gimple_call_fndecl (stmt
);
2653 if (fndecl
== NULL_TREE
)
2656 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2657 if (node
== NULL
|| node
->simd_clones
== NULL
)
2660 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2663 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2666 if (gimple_call_lhs (stmt
)
2667 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2670 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2672 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2674 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2678 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2681 /* Process function arguments. */
2682 nargs
= gimple_call_num_args (stmt
);
2684 /* Bail out if the function has zero arguments. */
2688 arginfo
.create (nargs
);
2690 for (i
= 0; i
< nargs
; i
++)
2692 simd_call_arg_info thisarginfo
;
2695 thisarginfo
.linear_step
= 0;
2696 thisarginfo
.align
= 0;
2697 thisarginfo
.op
= NULL_TREE
;
2699 op
= gimple_call_arg (stmt
, i
);
2700 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2701 &def_stmt
, &def
, &thisarginfo
.dt
,
2702 &thisarginfo
.vectype
)
2703 || thisarginfo
.dt
== vect_uninitialized_def
)
2705 if (dump_enabled_p ())
2706 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2707 "use not simple.\n");
2712 if (thisarginfo
.dt
== vect_constant_def
2713 || thisarginfo
.dt
== vect_external_def
)
2714 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2716 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2718 if (thisarginfo
.dt
!= vect_constant_def
2719 && thisarginfo
.dt
!= vect_external_def
2721 && TREE_CODE (op
) == SSA_NAME
2722 && simple_iv (loop
, loop_containing_stmt (stmt
), op
, &iv
, false)
2723 && tree_fits_shwi_p (iv
.step
))
2725 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2726 thisarginfo
.op
= iv
.base
;
2728 else if ((thisarginfo
.dt
== vect_constant_def
2729 || thisarginfo
.dt
== vect_external_def
)
2730 && POINTER_TYPE_P (TREE_TYPE (op
)))
2731 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2733 arginfo
.quick_push (thisarginfo
);
2736 unsigned int badness
= 0;
2737 struct cgraph_node
*bestn
= NULL
;
2738 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
))
2739 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
));
2741 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2742 n
= n
->simdclone
->next_clone
)
2744 unsigned int this_badness
= 0;
2745 if (n
->simdclone
->simdlen
2746 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2747 || n
->simdclone
->nargs
!= nargs
)
2749 if (n
->simdclone
->simdlen
2750 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2751 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2752 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2753 if (n
->simdclone
->inbranch
)
2754 this_badness
+= 2048;
2755 int target_badness
= targetm
.simd_clone
.usable (n
);
2756 if (target_badness
< 0)
2758 this_badness
+= target_badness
* 512;
2759 /* FORNOW: Have to add code to add the mask argument. */
2760 if (n
->simdclone
->inbranch
)
2762 for (i
= 0; i
< nargs
; i
++)
2764 switch (n
->simdclone
->args
[i
].arg_type
)
2766 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2767 if (!useless_type_conversion_p
2768 (n
->simdclone
->args
[i
].orig_type
,
2769 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2771 else if (arginfo
[i
].dt
== vect_constant_def
2772 || arginfo
[i
].dt
== vect_external_def
2773 || arginfo
[i
].linear_step
)
2776 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2777 if (arginfo
[i
].dt
!= vect_constant_def
2778 && arginfo
[i
].dt
!= vect_external_def
)
2781 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2782 if (arginfo
[i
].dt
== vect_constant_def
2783 || arginfo
[i
].dt
== vect_external_def
2784 || (arginfo
[i
].linear_step
2785 != n
->simdclone
->args
[i
].linear_step
))
2788 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2792 case SIMD_CLONE_ARG_TYPE_MASK
:
2795 if (i
== (size_t) -1)
2797 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2802 if (arginfo
[i
].align
)
2803 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2804 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2806 if (i
== (size_t) -1)
2808 if (bestn
== NULL
|| this_badness
< badness
)
2811 badness
= this_badness
;
2821 for (i
= 0; i
< nargs
; i
++)
2822 if ((arginfo
[i
].dt
== vect_constant_def
2823 || arginfo
[i
].dt
== vect_external_def
)
2824 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2827 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2829 if (arginfo
[i
].vectype
== NULL
2830 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2831 > bestn
->simdclone
->simdlen
))
2838 fndecl
= bestn
->decl
;
2839 nunits
= bestn
->simdclone
->simdlen
;
2840 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2842 /* If the function isn't const, only allow it in simd loops where user
2843 has asserted that at least nunits consecutive iterations can be
2844 performed using SIMD instructions. */
2845 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2846 && gimple_vuse (stmt
))
2852 /* Sanity check: make sure that at least one copy of the vectorized stmt
2853 needs to be generated. */
2854 gcc_assert (ncopies
>= 1);
2856 if (!vec_stmt
) /* transformation not required. */
2858 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
) = bestn
->decl
;
2859 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2860 if (dump_enabled_p ())
2861 dump_printf_loc (MSG_NOTE
, vect_location
,
2862 "=== vectorizable_simd_clone_call ===\n");
2863 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2870 if (dump_enabled_p ())
2871 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2874 scalar_dest
= gimple_call_lhs (stmt
);
2875 vec_dest
= NULL_TREE
;
2880 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2881 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2882 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2885 rtype
= TREE_TYPE (ratype
);
2889 prev_stmt_info
= NULL
;
2890 for (j
= 0; j
< ncopies
; ++j
)
2892 /* Build argument list for the vectorized call. */
2894 vargs
.create (nargs
);
2898 for (i
= 0; i
< nargs
; i
++)
2900 unsigned int k
, l
, m
, o
;
2902 op
= gimple_call_arg (stmt
, i
);
2903 switch (bestn
->simdclone
->args
[i
].arg_type
)
2905 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2906 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2907 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2908 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2910 if (TYPE_VECTOR_SUBPARTS (atype
)
2911 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2913 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2914 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2915 / TYPE_VECTOR_SUBPARTS (atype
));
2916 gcc_assert ((k
& (k
- 1)) == 0);
2919 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2922 vec_oprnd0
= arginfo
[i
].op
;
2923 if ((m
& (k
- 1)) == 0)
2925 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2928 arginfo
[i
].op
= vec_oprnd0
;
2930 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2932 bitsize_int ((m
& (k
- 1)) * prec
));
2934 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2936 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2937 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2941 k
= (TYPE_VECTOR_SUBPARTS (atype
)
2942 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
2943 gcc_assert ((k
& (k
- 1)) == 0);
2944 vec
<constructor_elt
, va_gc
> *ctor_elts
;
2946 vec_alloc (ctor_elts
, k
);
2949 for (l
= 0; l
< k
; l
++)
2951 if (m
== 0 && l
== 0)
2953 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2956 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2958 arginfo
[i
].op
= vec_oprnd0
;
2961 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
2965 vargs
.safe_push (vec_oprnd0
);
2968 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
2970 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2972 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2973 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2978 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2979 vargs
.safe_push (op
);
2981 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2986 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
2991 edge pe
= loop_preheader_edge (loop
);
2992 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2993 gcc_assert (!new_bb
);
2995 tree phi_res
= copy_ssa_name (op
, NULL
);
2996 gimple new_phi
= create_phi_node (phi_res
, loop
->header
);
2997 set_vinfo_for_stmt (new_phi
,
2998 new_stmt_vec_info (new_phi
, loop_vinfo
,
3000 add_phi_arg (new_phi
, arginfo
[i
].op
,
3001 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3003 = POINTER_TYPE_P (TREE_TYPE (op
))
3004 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3005 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3006 ? sizetype
: TREE_TYPE (op
);
3008 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3010 tree tcst
= wide_int_to_tree (type
, cst
);
3011 tree phi_arg
= copy_ssa_name (op
, NULL
);
3012 new_stmt
= gimple_build_assign_with_ops (code
, phi_arg
,
3014 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3015 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3016 set_vinfo_for_stmt (new_stmt
,
3017 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3019 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3021 arginfo
[i
].op
= phi_res
;
3022 vargs
.safe_push (phi_res
);
3027 = POINTER_TYPE_P (TREE_TYPE (op
))
3028 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3029 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3030 ? sizetype
: TREE_TYPE (op
);
3032 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3034 tree tcst
= wide_int_to_tree (type
, cst
);
3035 new_temp
= make_ssa_name (TREE_TYPE (op
), NULL
);
3037 = gimple_build_assign_with_ops (code
, new_temp
,
3038 arginfo
[i
].op
, tcst
);
3039 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3040 vargs
.safe_push (new_temp
);
3043 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3049 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3052 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3054 new_temp
= create_tmp_var (ratype
, NULL
);
3055 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3056 == TYPE_VECTOR_SUBPARTS (rtype
))
3057 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3059 new_temp
= make_ssa_name (rtype
, new_stmt
);
3060 gimple_call_set_lhs (new_stmt
, new_temp
);
3062 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3066 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3069 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3070 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3071 gcc_assert ((k
& (k
- 1)) == 0);
3072 for (l
= 0; l
< k
; l
++)
3077 t
= build_fold_addr_expr (new_temp
);
3078 t
= build2 (MEM_REF
, vectype
, t
,
3079 build_int_cst (TREE_TYPE (t
),
3080 l
* prec
/ BITS_PER_UNIT
));
3083 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3084 size_int (prec
), bitsize_int (l
* prec
));
3086 = gimple_build_assign (make_ssa_name (vectype
, NULL
), t
);
3087 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3088 if (j
== 0 && l
== 0)
3089 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3091 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3093 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3098 tree clobber
= build_constructor (ratype
, NULL
);
3099 TREE_THIS_VOLATILE (clobber
) = 1;
3100 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3101 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3105 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3107 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3108 / TYPE_VECTOR_SUBPARTS (rtype
));
3109 gcc_assert ((k
& (k
- 1)) == 0);
3110 if ((j
& (k
- 1)) == 0)
3111 vec_alloc (ret_ctor_elts
, k
);
3114 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3115 for (m
= 0; m
< o
; m
++)
3117 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3118 size_int (m
), NULL_TREE
, NULL_TREE
);
3120 = gimple_build_assign (make_ssa_name (rtype
, NULL
),
3122 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3123 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3124 gimple_assign_lhs (new_stmt
));
3126 tree clobber
= build_constructor (ratype
, NULL
);
3127 TREE_THIS_VOLATILE (clobber
) = 1;
3128 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3129 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3132 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3133 if ((j
& (k
- 1)) != k
- 1)
3135 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3137 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
),
3139 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3141 if ((unsigned) j
== k
- 1)
3142 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3144 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3146 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3151 tree t
= build_fold_addr_expr (new_temp
);
3152 t
= build2 (MEM_REF
, vectype
, t
,
3153 build_int_cst (TREE_TYPE (t
), 0));
3155 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
), t
);
3156 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3157 tree clobber
= build_constructor (ratype
, NULL
);
3158 TREE_THIS_VOLATILE (clobber
) = 1;
3159 vect_finish_stmt_generation (stmt
,
3160 gimple_build_assign (new_temp
,
3166 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3168 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3170 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3175 /* The call in STMT might prevent it from being removed in dce.
3176 We however cannot remove it here, due to the way the ssa name
3177 it defines is mapped to the new definition. So just replace
3178 rhs of the statement with something harmless. */
3185 type
= TREE_TYPE (scalar_dest
);
3186 if (is_pattern_stmt_p (stmt_info
))
3187 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3189 lhs
= gimple_call_lhs (stmt
);
3190 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3193 new_stmt
= gimple_build_nop ();
3194 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3195 set_vinfo_for_stmt (stmt
, NULL
);
3196 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3197 gsi_replace (gsi
, new_stmt
, false);
3198 unlink_stmt_vdef (stmt
);
3204 /* Function vect_gen_widened_results_half
3206 Create a vector stmt whose code, type, number of arguments, and result
3207 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3208 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3209 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3210 needs to be created (DECL is a function-decl of a target-builtin).
3211 STMT is the original scalar stmt that we are vectorizing. */
3214 vect_gen_widened_results_half (enum tree_code code
,
3216 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3217 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3223 /* Generate half of the widened result: */
3224 if (code
== CALL_EXPR
)
3226 /* Target specific support */
3227 if (op_type
== binary_op
)
3228 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3230 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3231 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3232 gimple_call_set_lhs (new_stmt
, new_temp
);
3236 /* Generic support */
3237 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3238 if (op_type
!= binary_op
)
3240 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
3242 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3243 gimple_assign_set_lhs (new_stmt
, new_temp
);
3245 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3251 /* Get vectorized definitions for loop-based vectorization. For the first
3252 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3253 scalar operand), and for the rest we get a copy with
3254 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3255 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3256 The vectors are collected into VEC_OPRNDS. */
3259 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3260 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3264 /* Get first vector operand. */
3265 /* All the vector operands except the very first one (that is scalar oprnd)
3267 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3268 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3270 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3272 vec_oprnds
->quick_push (vec_oprnd
);
3274 /* Get second vector operand. */
3275 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3276 vec_oprnds
->quick_push (vec_oprnd
);
3280 /* For conversion in multiple steps, continue to get operands
3283 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3287 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3288 For multi-step conversions store the resulting vectors and call the function
3292 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3293 int multi_step_cvt
, gimple stmt
,
3295 gimple_stmt_iterator
*gsi
,
3296 slp_tree slp_node
, enum tree_code code
,
3297 stmt_vec_info
*prev_stmt_info
)
3300 tree vop0
, vop1
, new_tmp
, vec_dest
;
3302 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3304 vec_dest
= vec_dsts
.pop ();
3306 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3308 /* Create demotion operation. */
3309 vop0
= (*vec_oprnds
)[i
];
3310 vop1
= (*vec_oprnds
)[i
+ 1];
3311 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3312 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3313 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3314 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3317 /* Store the resulting vector for next recursive call. */
3318 (*vec_oprnds
)[i
/2] = new_tmp
;
3321 /* This is the last step of the conversion sequence. Store the
3322 vectors in SLP_NODE or in vector info of the scalar statement
3323 (or in STMT_VINFO_RELATED_STMT chain). */
3325 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3328 if (!*prev_stmt_info
)
3329 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3331 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3333 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3338 /* For multi-step demotion operations we first generate demotion operations
3339 from the source type to the intermediate types, and then combine the
3340 results (stored in VEC_OPRNDS) in demotion operation to the destination
3344 /* At each level of recursion we have half of the operands we had at the
3346 vec_oprnds
->truncate ((i
+1)/2);
3347 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3348 stmt
, vec_dsts
, gsi
, slp_node
,
3349 VEC_PACK_TRUNC_EXPR
,
3353 vec_dsts
.quick_push (vec_dest
);
3357 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3358 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3359 the resulting vectors and call the function recursively. */
3362 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3363 vec
<tree
> *vec_oprnds1
,
3364 gimple stmt
, tree vec_dest
,
3365 gimple_stmt_iterator
*gsi
,
3366 enum tree_code code1
,
3367 enum tree_code code2
, tree decl1
,
3368 tree decl2
, int op_type
)
3371 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3372 gimple new_stmt1
, new_stmt2
;
3373 vec
<tree
> vec_tmp
= vNULL
;
3375 vec_tmp
.create (vec_oprnds0
->length () * 2);
3376 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3378 if (op_type
== binary_op
)
3379 vop1
= (*vec_oprnds1
)[i
];
3383 /* Generate the two halves of promotion operation. */
3384 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3385 op_type
, vec_dest
, gsi
, stmt
);
3386 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3387 op_type
, vec_dest
, gsi
, stmt
);
3388 if (is_gimple_call (new_stmt1
))
3390 new_tmp1
= gimple_call_lhs (new_stmt1
);
3391 new_tmp2
= gimple_call_lhs (new_stmt2
);
3395 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3396 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3399 /* Store the results for the next step. */
3400 vec_tmp
.quick_push (new_tmp1
);
3401 vec_tmp
.quick_push (new_tmp2
);
3404 vec_oprnds0
->release ();
3405 *vec_oprnds0
= vec_tmp
;
3409 /* Check if STMT performs a conversion operation, that can be vectorized.
3410 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3411 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3412 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3415 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3416 gimple
*vec_stmt
, slp_tree slp_node
)
3420 tree op0
, op1
= NULL_TREE
;
3421 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3422 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3423 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3424 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3425 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3426 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3430 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3431 gimple new_stmt
= NULL
;
3432 stmt_vec_info prev_stmt_info
;
3435 tree vectype_out
, vectype_in
;
3437 tree lhs_type
, rhs_type
;
3438 enum { NARROW
, NONE
, WIDEN
} modifier
;
3439 vec
<tree
> vec_oprnds0
= vNULL
;
3440 vec
<tree
> vec_oprnds1
= vNULL
;
3442 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3443 int multi_step_cvt
= 0;
3444 vec
<tree
> vec_dsts
= vNULL
;
3445 vec
<tree
> interm_types
= vNULL
;
3446 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3448 enum machine_mode rhs_mode
;
3449 unsigned short fltsz
;
3451 /* Is STMT a vectorizable conversion? */
3453 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3456 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3459 if (!is_gimple_assign (stmt
))
3462 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3465 code
= gimple_assign_rhs_code (stmt
);
3466 if (!CONVERT_EXPR_CODE_P (code
)
3467 && code
!= FIX_TRUNC_EXPR
3468 && code
!= FLOAT_EXPR
3469 && code
!= WIDEN_MULT_EXPR
3470 && code
!= WIDEN_LSHIFT_EXPR
)
3473 op_type
= TREE_CODE_LENGTH (code
);
3475 /* Check types of lhs and rhs. */
3476 scalar_dest
= gimple_assign_lhs (stmt
);
3477 lhs_type
= TREE_TYPE (scalar_dest
);
3478 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3480 op0
= gimple_assign_rhs1 (stmt
);
3481 rhs_type
= TREE_TYPE (op0
);
3483 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3484 && !((INTEGRAL_TYPE_P (lhs_type
)
3485 && INTEGRAL_TYPE_P (rhs_type
))
3486 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3487 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3490 if ((INTEGRAL_TYPE_P (lhs_type
)
3491 && (TYPE_PRECISION (lhs_type
)
3492 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3493 || (INTEGRAL_TYPE_P (rhs_type
)
3494 && (TYPE_PRECISION (rhs_type
)
3495 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3497 if (dump_enabled_p ())
3498 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3499 "type conversion to/from bit-precision unsupported."
3504 /* Check the operands of the operation. */
3505 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3506 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3508 if (dump_enabled_p ())
3509 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3510 "use not simple.\n");
3513 if (op_type
== binary_op
)
3517 op1
= gimple_assign_rhs2 (stmt
);
3518 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3519 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3521 if (CONSTANT_CLASS_P (op0
))
3522 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3523 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3525 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3530 if (dump_enabled_p ())
3531 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3532 "use not simple.\n");
3537 /* If op0 is an external or constant defs use a vector type of
3538 the same size as the output vector type. */
3540 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3542 gcc_assert (vectype_in
);
3545 if (dump_enabled_p ())
3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3548 "no vectype for scalar type ");
3549 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3550 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3556 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3557 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3558 if (nunits_in
< nunits_out
)
3560 else if (nunits_out
== nunits_in
)
3565 /* Multiple types in SLP are handled by creating the appropriate number of
3566 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3568 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3570 else if (modifier
== NARROW
)
3571 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3573 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3575 /* Sanity check: make sure that at least one copy of the vectorized stmt
3576 needs to be generated. */
3577 gcc_assert (ncopies
>= 1);
3579 /* Supportable by target? */
3583 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3585 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3590 if (dump_enabled_p ())
3591 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3592 "conversion not supported by target.\n");
3596 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3597 &code1
, &code2
, &multi_step_cvt
,
3600 /* Binary widening operation can only be supported directly by the
3602 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3606 if (code
!= FLOAT_EXPR
3607 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3608 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3611 rhs_mode
= TYPE_MODE (rhs_type
);
3612 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3613 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3614 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3615 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3618 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3619 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3620 if (cvt_type
== NULL_TREE
)
3623 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3625 if (!supportable_convert_operation (code
, vectype_out
,
3626 cvt_type
, &decl1
, &codecvt1
))
3629 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3630 cvt_type
, &codecvt1
,
3631 &codecvt2
, &multi_step_cvt
,
3635 gcc_assert (multi_step_cvt
== 0);
3637 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3638 vectype_in
, &code1
, &code2
,
3639 &multi_step_cvt
, &interm_types
))
3643 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3646 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3647 codecvt2
= ERROR_MARK
;
3651 interm_types
.safe_push (cvt_type
);
3652 cvt_type
= NULL_TREE
;
3657 gcc_assert (op_type
== unary_op
);
3658 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3659 &code1
, &multi_step_cvt
,
3663 if (code
!= FIX_TRUNC_EXPR
3664 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3665 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3668 rhs_mode
= TYPE_MODE (rhs_type
);
3670 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3671 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3672 if (cvt_type
== NULL_TREE
)
3674 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3677 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3678 &code1
, &multi_step_cvt
,
3687 if (!vec_stmt
) /* transformation not required. */
3689 if (dump_enabled_p ())
3690 dump_printf_loc (MSG_NOTE
, vect_location
,
3691 "=== vectorizable_conversion ===\n");
3692 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3694 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3695 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3697 else if (modifier
== NARROW
)
3699 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3700 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3704 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3705 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3707 interm_types
.release ();
3712 if (dump_enabled_p ())
3713 dump_printf_loc (MSG_NOTE
, vect_location
,
3714 "transform conversion. ncopies = %d.\n", ncopies
);
3716 if (op_type
== binary_op
)
3718 if (CONSTANT_CLASS_P (op0
))
3719 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3720 else if (CONSTANT_CLASS_P (op1
))
3721 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3724 /* In case of multi-step conversion, we first generate conversion operations
3725 to the intermediate types, and then from that types to the final one.
3726 We create vector destinations for the intermediate type (TYPES) received
3727 from supportable_*_operation, and store them in the correct order
3728 for future use in vect_create_vectorized_*_stmts (). */
3729 vec_dsts
.create (multi_step_cvt
+ 1);
3730 vec_dest
= vect_create_destination_var (scalar_dest
,
3731 (cvt_type
&& modifier
== WIDEN
)
3732 ? cvt_type
: vectype_out
);
3733 vec_dsts
.quick_push (vec_dest
);
3737 for (i
= interm_types
.length () - 1;
3738 interm_types
.iterate (i
, &intermediate_type
); i
--)
3740 vec_dest
= vect_create_destination_var (scalar_dest
,
3742 vec_dsts
.quick_push (vec_dest
);
3747 vec_dest
= vect_create_destination_var (scalar_dest
,
3749 ? vectype_out
: cvt_type
);
3753 if (modifier
== WIDEN
)
3755 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3756 if (op_type
== binary_op
)
3757 vec_oprnds1
.create (1);
3759 else if (modifier
== NARROW
)
3760 vec_oprnds0
.create (
3761 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3763 else if (code
== WIDEN_LSHIFT_EXPR
)
3764 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3767 prev_stmt_info
= NULL
;
3771 for (j
= 0; j
< ncopies
; j
++)
3774 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3777 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3779 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3781 /* Arguments are ready, create the new vector stmt. */
3782 if (code1
== CALL_EXPR
)
3784 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3785 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3786 gimple_call_set_lhs (new_stmt
, new_temp
);
3790 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3791 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
3793 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3794 gimple_assign_set_lhs (new_stmt
, new_temp
);
3797 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3799 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3803 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3805 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3806 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3811 /* In case the vectorization factor (VF) is bigger than the number
3812 of elements that we can fit in a vectype (nunits), we have to
3813 generate more than one vector stmt - i.e - we need to "unroll"
3814 the vector stmt by a factor VF/nunits. */
3815 for (j
= 0; j
< ncopies
; j
++)
3822 if (code
== WIDEN_LSHIFT_EXPR
)
3827 /* Store vec_oprnd1 for every vector stmt to be created
3828 for SLP_NODE. We check during the analysis that all
3829 the shift arguments are the same. */
3830 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3831 vec_oprnds1
.quick_push (vec_oprnd1
);
3833 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3837 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3838 &vec_oprnds1
, slp_node
, -1);
3842 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3843 vec_oprnds0
.quick_push (vec_oprnd0
);
3844 if (op_type
== binary_op
)
3846 if (code
== WIDEN_LSHIFT_EXPR
)
3849 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3851 vec_oprnds1
.quick_push (vec_oprnd1
);
3857 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3858 vec_oprnds0
.truncate (0);
3859 vec_oprnds0
.quick_push (vec_oprnd0
);
3860 if (op_type
== binary_op
)
3862 if (code
== WIDEN_LSHIFT_EXPR
)
3865 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3867 vec_oprnds1
.truncate (0);
3868 vec_oprnds1
.quick_push (vec_oprnd1
);
3872 /* Arguments are ready. Create the new vector stmts. */
3873 for (i
= multi_step_cvt
; i
>= 0; i
--)
3875 tree this_dest
= vec_dsts
[i
];
3876 enum tree_code c1
= code1
, c2
= code2
;
3877 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3882 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3884 stmt
, this_dest
, gsi
,
3885 c1
, c2
, decl1
, decl2
,
3889 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3893 if (codecvt1
== CALL_EXPR
)
3895 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3896 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3897 gimple_call_set_lhs (new_stmt
, new_temp
);
3901 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3902 new_temp
= make_ssa_name (vec_dest
, NULL
);
3903 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
3908 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3911 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3914 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3917 if (!prev_stmt_info
)
3918 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3920 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3921 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3926 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3930 /* In case the vectorization factor (VF) is bigger than the number
3931 of elements that we can fit in a vectype (nunits), we have to
3932 generate more than one vector stmt - i.e - we need to "unroll"
3933 the vector stmt by a factor VF/nunits. */
3934 for (j
= 0; j
< ncopies
; j
++)
3938 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3942 vec_oprnds0
.truncate (0);
3943 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3944 vect_pow2 (multi_step_cvt
) - 1);
3947 /* Arguments are ready. Create the new vector stmts. */
3949 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3951 if (codecvt1
== CALL_EXPR
)
3953 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3954 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3955 gimple_call_set_lhs (new_stmt
, new_temp
);
3959 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3960 new_temp
= make_ssa_name (vec_dest
, NULL
);
3961 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
3965 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3966 vec_oprnds0
[i
] = new_temp
;
3969 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
3970 stmt
, vec_dsts
, gsi
,
3975 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3979 vec_oprnds0
.release ();
3980 vec_oprnds1
.release ();
3981 vec_dsts
.release ();
3982 interm_types
.release ();
3988 /* Function vectorizable_assignment.
3990 Check if STMT performs an assignment (copy) that can be vectorized.
3991 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3992 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3993 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3996 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
3997 gimple
*vec_stmt
, slp_tree slp_node
)
4002 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4003 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4004 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4008 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4009 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4012 vec
<tree
> vec_oprnds
= vNULL
;
4014 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4015 gimple new_stmt
= NULL
;
4016 stmt_vec_info prev_stmt_info
= NULL
;
4017 enum tree_code code
;
4020 /* Multiple types in SLP are handled by creating the appropriate number of
4021 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4023 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4026 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4028 gcc_assert (ncopies
>= 1);
4030 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4033 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4036 /* Is vectorizable assignment? */
4037 if (!is_gimple_assign (stmt
))
4040 scalar_dest
= gimple_assign_lhs (stmt
);
4041 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4044 code
= gimple_assign_rhs_code (stmt
);
4045 if (gimple_assign_single_p (stmt
)
4046 || code
== PAREN_EXPR
4047 || CONVERT_EXPR_CODE_P (code
))
4048 op
= gimple_assign_rhs1 (stmt
);
4052 if (code
== VIEW_CONVERT_EXPR
)
4053 op
= TREE_OPERAND (op
, 0);
4055 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4056 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4058 if (dump_enabled_p ())
4059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4060 "use not simple.\n");
4064 /* We can handle NOP_EXPR conversions that do not change the number
4065 of elements or the vector size. */
4066 if ((CONVERT_EXPR_CODE_P (code
)
4067 || code
== VIEW_CONVERT_EXPR
)
4069 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4070 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4071 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4074 /* We do not handle bit-precision changes. */
4075 if ((CONVERT_EXPR_CODE_P (code
)
4076 || code
== VIEW_CONVERT_EXPR
)
4077 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4078 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4079 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4080 || ((TYPE_PRECISION (TREE_TYPE (op
))
4081 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4082 /* But a conversion that does not change the bit-pattern is ok. */
4083 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4084 > TYPE_PRECISION (TREE_TYPE (op
)))
4085 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4087 if (dump_enabled_p ())
4088 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4089 "type conversion to/from bit-precision "
4094 if (!vec_stmt
) /* transformation not required. */
4096 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4097 if (dump_enabled_p ())
4098 dump_printf_loc (MSG_NOTE
, vect_location
,
4099 "=== vectorizable_assignment ===\n");
4100 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4105 if (dump_enabled_p ())
4106 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4109 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4112 for (j
= 0; j
< ncopies
; j
++)
4116 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4118 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4120 /* Arguments are ready. create the new vector stmt. */
4121 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4123 if (CONVERT_EXPR_CODE_P (code
)
4124 || code
== VIEW_CONVERT_EXPR
)
4125 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4126 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4127 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4128 gimple_assign_set_lhs (new_stmt
, new_temp
);
4129 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4131 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4138 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4140 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4142 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4145 vec_oprnds
.release ();
4150 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4151 either as shift by a scalar or by a vector. */
4154 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4157 enum machine_mode vec_mode
;
4162 vectype
= get_vectype_for_scalar_type (scalar_type
);
4166 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4168 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4170 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4172 || (optab_handler (optab
, TYPE_MODE (vectype
))
4173 == CODE_FOR_nothing
))
4177 vec_mode
= TYPE_MODE (vectype
);
4178 icode
= (int) optab_handler (optab
, vec_mode
);
4179 if (icode
== CODE_FOR_nothing
)
4186 /* Function vectorizable_shift.
4188 Check if STMT performs a shift operation that can be vectorized.
4189 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4190 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4191 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4194 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4195 gimple
*vec_stmt
, slp_tree slp_node
)
4199 tree op0
, op1
= NULL
;
4200 tree vec_oprnd1
= NULL_TREE
;
4201 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4203 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4204 enum tree_code code
;
4205 enum machine_mode vec_mode
;
4209 enum machine_mode optab_op2_mode
;
4212 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4213 gimple new_stmt
= NULL
;
4214 stmt_vec_info prev_stmt_info
;
4221 vec
<tree
> vec_oprnds0
= vNULL
;
4222 vec
<tree
> vec_oprnds1
= vNULL
;
4225 bool scalar_shift_arg
= true;
4226 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4229 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4232 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4235 /* Is STMT a vectorizable binary/unary operation? */
4236 if (!is_gimple_assign (stmt
))
4239 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4242 code
= gimple_assign_rhs_code (stmt
);
4244 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4245 || code
== RROTATE_EXPR
))
4248 scalar_dest
= gimple_assign_lhs (stmt
);
4249 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4250 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4251 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4253 if (dump_enabled_p ())
4254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4255 "bit-precision shifts not supported.\n");
4259 op0
= gimple_assign_rhs1 (stmt
);
4260 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4261 &def_stmt
, &def
, &dt
[0], &vectype
))
4263 if (dump_enabled_p ())
4264 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4265 "use not simple.\n");
4268 /* If op0 is an external or constant def use a vector type with
4269 the same size as the output vector type. */
4271 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4273 gcc_assert (vectype
);
4276 if (dump_enabled_p ())
4277 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4278 "no vectype for scalar type\n");
4282 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4283 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4284 if (nunits_out
!= nunits_in
)
4287 op1
= gimple_assign_rhs2 (stmt
);
4288 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4289 &def
, &dt
[1], &op1_vectype
))
4291 if (dump_enabled_p ())
4292 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4293 "use not simple.\n");
4298 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4302 /* Multiple types in SLP are handled by creating the appropriate number of
4303 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4305 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4308 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4310 gcc_assert (ncopies
>= 1);
4312 /* Determine whether the shift amount is a vector, or scalar. If the
4313 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4315 if (dt
[1] == vect_internal_def
&& !slp_node
)
4316 scalar_shift_arg
= false;
4317 else if (dt
[1] == vect_constant_def
4318 || dt
[1] == vect_external_def
4319 || dt
[1] == vect_internal_def
)
4321 /* In SLP, need to check whether the shift count is the same,
4322 in loops if it is a constant or invariant, it is always
4326 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4329 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4330 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4331 scalar_shift_arg
= false;
4336 if (dump_enabled_p ())
4337 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4338 "operand mode requires invariant argument.\n");
4342 /* Vector shifted by vector. */
4343 if (!scalar_shift_arg
)
4345 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE
, vect_location
,
4348 "vector/vector shift/rotate found.\n");
4351 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4352 if (op1_vectype
== NULL_TREE
4353 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4355 if (dump_enabled_p ())
4356 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4357 "unusable type for last operand in"
4358 " vector/vector shift/rotate.\n");
4362 /* See if the machine has a vector shifted by scalar insn and if not
4363 then see if it has a vector shifted by vector insn. */
4366 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4368 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4370 if (dump_enabled_p ())
4371 dump_printf_loc (MSG_NOTE
, vect_location
,
4372 "vector/scalar shift/rotate found.\n");
4376 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4378 && (optab_handler (optab
, TYPE_MODE (vectype
))
4379 != CODE_FOR_nothing
))
4381 scalar_shift_arg
= false;
4383 if (dump_enabled_p ())
4384 dump_printf_loc (MSG_NOTE
, vect_location
,
4385 "vector/vector shift/rotate found.\n");
4387 /* Unlike the other binary operators, shifts/rotates have
4388 the rhs being int, instead of the same type as the lhs,
4389 so make sure the scalar is the right type if we are
4390 dealing with vectors of long long/long/short/char. */
4391 if (dt
[1] == vect_constant_def
)
4392 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4393 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4397 && TYPE_MODE (TREE_TYPE (vectype
))
4398 != TYPE_MODE (TREE_TYPE (op1
)))
4400 if (dump_enabled_p ())
4401 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4402 "unusable type for last operand in"
4403 " vector/vector shift/rotate.\n");
4406 if (vec_stmt
&& !slp_node
)
4408 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4409 op1
= vect_init_vector (stmt
, op1
,
4410 TREE_TYPE (vectype
), NULL
);
4417 /* Supportable by target? */
4420 if (dump_enabled_p ())
4421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4425 vec_mode
= TYPE_MODE (vectype
);
4426 icode
= (int) optab_handler (optab
, vec_mode
);
4427 if (icode
== CODE_FOR_nothing
)
4429 if (dump_enabled_p ())
4430 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4431 "op not supported by target.\n");
4432 /* Check only during analysis. */
4433 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4434 || (vf
< vect_min_worthwhile_factor (code
)
4437 if (dump_enabled_p ())
4438 dump_printf_loc (MSG_NOTE
, vect_location
,
4439 "proceeding using word mode.\n");
4442 /* Worthwhile without SIMD support? Check only during analysis. */
4443 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4444 && vf
< vect_min_worthwhile_factor (code
)
4447 if (dump_enabled_p ())
4448 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4449 "not worthwhile without SIMD support.\n");
4453 if (!vec_stmt
) /* transformation not required. */
4455 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4456 if (dump_enabled_p ())
4457 dump_printf_loc (MSG_NOTE
, vect_location
,
4458 "=== vectorizable_shift ===\n");
4459 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_NOTE
, vect_location
,
4467 "transform binary/unary operation.\n");
4470 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4472 prev_stmt_info
= NULL
;
4473 for (j
= 0; j
< ncopies
; j
++)
4478 if (scalar_shift_arg
)
4480 /* Vector shl and shr insn patterns can be defined with scalar
4481 operand 2 (shift operand). In this case, use constant or loop
4482 invariant op1 directly, without extending it to vector mode
4484 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4485 if (!VECTOR_MODE_P (optab_op2_mode
))
4487 if (dump_enabled_p ())
4488 dump_printf_loc (MSG_NOTE
, vect_location
,
4489 "operand 1 using scalar mode.\n");
4491 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4492 vec_oprnds1
.quick_push (vec_oprnd1
);
4495 /* Store vec_oprnd1 for every vector stmt to be created
4496 for SLP_NODE. We check during the analysis that all
4497 the shift arguments are the same.
4498 TODO: Allow different constants for different vector
4499 stmts generated for an SLP instance. */
4500 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4501 vec_oprnds1
.quick_push (vec_oprnd1
);
4506 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4507 (a special case for certain kind of vector shifts); otherwise,
4508 operand 1 should be of a vector type (the usual case). */
4510 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4513 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4517 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4519 /* Arguments are ready. Create the new vector stmt. */
4520 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4522 vop1
= vec_oprnds1
[i
];
4523 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
4524 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4525 gimple_assign_set_lhs (new_stmt
, new_temp
);
4526 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4528 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4535 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4537 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4538 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4541 vec_oprnds0
.release ();
4542 vec_oprnds1
.release ();
4548 /* Function vectorizable_operation.
4550 Check if STMT performs a binary, unary or ternary operation that can
4552 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4553 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4554 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4557 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4558 gimple
*vec_stmt
, slp_tree slp_node
)
4562 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4563 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4565 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4566 enum tree_code code
;
4567 enum machine_mode vec_mode
;
4574 enum vect_def_type dt
[3]
4575 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4576 gimple new_stmt
= NULL
;
4577 stmt_vec_info prev_stmt_info
;
4583 vec
<tree
> vec_oprnds0
= vNULL
;
4584 vec
<tree
> vec_oprnds1
= vNULL
;
4585 vec
<tree
> vec_oprnds2
= vNULL
;
4586 tree vop0
, vop1
, vop2
;
4587 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4590 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4593 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4596 /* Is STMT a vectorizable binary/unary operation? */
4597 if (!is_gimple_assign (stmt
))
4600 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4603 code
= gimple_assign_rhs_code (stmt
);
4605 /* For pointer addition, we should use the normal plus for
4606 the vector addition. */
4607 if (code
== POINTER_PLUS_EXPR
)
4610 /* Support only unary or binary operations. */
4611 op_type
= TREE_CODE_LENGTH (code
);
4612 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4614 if (dump_enabled_p ())
4615 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4616 "num. args = %d (not unary/binary/ternary op).\n",
4621 scalar_dest
= gimple_assign_lhs (stmt
);
4622 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4624 /* Most operations cannot handle bit-precision types without extra
4626 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4627 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4628 /* Exception are bitwise binary operations. */
4629 && code
!= BIT_IOR_EXPR
4630 && code
!= BIT_XOR_EXPR
4631 && code
!= BIT_AND_EXPR
)
4633 if (dump_enabled_p ())
4634 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4635 "bit-precision arithmetic not supported.\n");
4639 op0
= gimple_assign_rhs1 (stmt
);
4640 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4641 &def_stmt
, &def
, &dt
[0], &vectype
))
4643 if (dump_enabled_p ())
4644 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4645 "use not simple.\n");
4648 /* If op0 is an external or constant def use a vector type with
4649 the same size as the output vector type. */
4651 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4653 gcc_assert (vectype
);
4656 if (dump_enabled_p ())
4658 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4659 "no vectype for scalar type ");
4660 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4662 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4668 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4669 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4670 if (nunits_out
!= nunits_in
)
4673 if (op_type
== binary_op
|| op_type
== ternary_op
)
4675 op1
= gimple_assign_rhs2 (stmt
);
4676 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4679 if (dump_enabled_p ())
4680 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4681 "use not simple.\n");
4685 if (op_type
== ternary_op
)
4687 op2
= gimple_assign_rhs3 (stmt
);
4688 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4691 if (dump_enabled_p ())
4692 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4693 "use not simple.\n");
4699 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4703 /* Multiple types in SLP are handled by creating the appropriate number of
4704 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4706 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4709 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4711 gcc_assert (ncopies
>= 1);
4713 /* Shifts are handled in vectorizable_shift (). */
4714 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4715 || code
== RROTATE_EXPR
)
4718 /* Supportable by target? */
4720 vec_mode
= TYPE_MODE (vectype
);
4721 if (code
== MULT_HIGHPART_EXPR
)
4723 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4724 icode
= LAST_INSN_CODE
;
4726 icode
= CODE_FOR_nothing
;
4730 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4733 if (dump_enabled_p ())
4734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4738 icode
= (int) optab_handler (optab
, vec_mode
);
4741 if (icode
== CODE_FOR_nothing
)
4743 if (dump_enabled_p ())
4744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4745 "op not supported by target.\n");
4746 /* Check only during analysis. */
4747 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4748 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4750 if (dump_enabled_p ())
4751 dump_printf_loc (MSG_NOTE
, vect_location
,
4752 "proceeding using word mode.\n");
4755 /* Worthwhile without SIMD support? Check only during analysis. */
4756 if (!VECTOR_MODE_P (vec_mode
)
4758 && vf
< vect_min_worthwhile_factor (code
))
4760 if (dump_enabled_p ())
4761 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4762 "not worthwhile without SIMD support.\n");
4766 if (!vec_stmt
) /* transformation not required. */
4768 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4769 if (dump_enabled_p ())
4770 dump_printf_loc (MSG_NOTE
, vect_location
,
4771 "=== vectorizable_operation ===\n");
4772 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_NOTE
, vect_location
,
4780 "transform binary/unary operation.\n");
4783 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4785 /* In case the vectorization factor (VF) is bigger than the number
4786 of elements that we can fit in a vectype (nunits), we have to generate
4787 more than one vector stmt - i.e - we need to "unroll" the
4788 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4789 from one copy of the vector stmt to the next, in the field
4790 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4791 stages to find the correct vector defs to be used when vectorizing
4792 stmts that use the defs of the current stmt. The example below
4793 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4794 we need to create 4 vectorized stmts):
4796 before vectorization:
4797 RELATED_STMT VEC_STMT
4801 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4803 RELATED_STMT VEC_STMT
4804 VS1_0: vx0 = memref0 VS1_1 -
4805 VS1_1: vx1 = memref1 VS1_2 -
4806 VS1_2: vx2 = memref2 VS1_3 -
4807 VS1_3: vx3 = memref3 - -
4808 S1: x = load - VS1_0
4811 step2: vectorize stmt S2 (done here):
4812 To vectorize stmt S2 we first need to find the relevant vector
4813 def for the first operand 'x'. This is, as usual, obtained from
4814 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4815 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4816 relevant vector def 'vx0'. Having found 'vx0' we can generate
4817 the vector stmt VS2_0, and as usual, record it in the
4818 STMT_VINFO_VEC_STMT of stmt S2.
4819 When creating the second copy (VS2_1), we obtain the relevant vector
4820 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4821 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4822 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4823 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4824 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4825 chain of stmts and pointers:
4826 RELATED_STMT VEC_STMT
4827 VS1_0: vx0 = memref0 VS1_1 -
4828 VS1_1: vx1 = memref1 VS1_2 -
4829 VS1_2: vx2 = memref2 VS1_3 -
4830 VS1_3: vx3 = memref3 - -
4831 S1: x = load - VS1_0
4832 VS2_0: vz0 = vx0 + v1 VS2_1 -
4833 VS2_1: vz1 = vx1 + v1 VS2_2 -
4834 VS2_2: vz2 = vx2 + v1 VS2_3 -
4835 VS2_3: vz3 = vx3 + v1 - -
4836 S2: z = x + 1 - VS2_0 */
4838 prev_stmt_info
= NULL
;
4839 for (j
= 0; j
< ncopies
; j
++)
4844 if (op_type
== binary_op
|| op_type
== ternary_op
)
4845 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4848 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4850 if (op_type
== ternary_op
)
4852 vec_oprnds2
.create (1);
4853 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4860 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4861 if (op_type
== ternary_op
)
4863 tree vec_oprnd
= vec_oprnds2
.pop ();
4864 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4869 /* Arguments are ready. Create the new vector stmt. */
4870 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4872 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4873 ? vec_oprnds1
[i
] : NULL_TREE
);
4874 vop2
= ((op_type
== ternary_op
)
4875 ? vec_oprnds2
[i
] : NULL_TREE
);
4876 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
4878 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4879 gimple_assign_set_lhs (new_stmt
, new_temp
);
4880 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4882 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4889 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4891 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4892 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4895 vec_oprnds0
.release ();
4896 vec_oprnds1
.release ();
4897 vec_oprnds2
.release ();
4902 /* A helper function to ensure data reference DR's base alignment
4906 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4911 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4913 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4914 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4916 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4917 DECL_USER_ALIGN (base_decl
) = 1;
4918 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4923 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4924 reversal of the vector elements. If that is impossible to do,
4928 perm_mask_for_reverse (tree vectype
)
4933 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4934 sel
= XALLOCAVEC (unsigned char, nunits
);
4936 for (i
= 0; i
< nunits
; ++i
)
4937 sel
[i
] = nunits
- 1 - i
;
4939 return vect_gen_perm_mask (vectype
, sel
);
4942 /* Function vectorizable_store.
4944 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4946 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4947 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4948 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4951 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4957 tree vec_oprnd
= NULL_TREE
;
4958 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4959 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4960 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4962 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4963 struct loop
*loop
= NULL
;
4964 enum machine_mode vec_mode
;
4966 enum dr_alignment_support alignment_support_scheme
;
4969 enum vect_def_type dt
;
4970 stmt_vec_info prev_stmt_info
= NULL
;
4971 tree dataref_ptr
= NULL_TREE
;
4972 tree dataref_offset
= NULL_TREE
;
4973 gimple ptr_incr
= NULL
;
4974 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4977 gimple next_stmt
, first_stmt
= NULL
;
4978 bool grouped_store
= false;
4979 bool store_lanes_p
= false;
4980 unsigned int group_size
, i
;
4981 vec
<tree
> dr_chain
= vNULL
;
4982 vec
<tree
> oprnds
= vNULL
;
4983 vec
<tree
> result_chain
= vNULL
;
4985 bool negative
= false;
4986 tree offset
= NULL_TREE
;
4987 vec
<tree
> vec_oprnds
= vNULL
;
4988 bool slp
= (slp_node
!= NULL
);
4989 unsigned int vec_num
;
4990 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4994 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4996 /* Multiple types in SLP are handled by creating the appropriate number of
4997 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4999 if (slp
|| PURE_SLP_STMT (stmt_info
))
5002 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5004 gcc_assert (ncopies
>= 1);
5006 /* FORNOW. This restriction should be relaxed. */
5007 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5009 if (dump_enabled_p ())
5010 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5011 "multiple types in nested loop.\n");
5015 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5018 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5021 /* Is vectorizable store? */
5023 if (!is_gimple_assign (stmt
))
5026 scalar_dest
= gimple_assign_lhs (stmt
);
5027 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5028 && is_pattern_stmt_p (stmt_info
))
5029 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5030 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5031 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5032 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5033 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5034 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5035 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5036 && TREE_CODE (scalar_dest
) != MEM_REF
)
5039 gcc_assert (gimple_assign_single_p (stmt
));
5040 op
= gimple_assign_rhs1 (stmt
);
5041 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5044 if (dump_enabled_p ())
5045 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5046 "use not simple.\n");
5050 elem_type
= TREE_TYPE (vectype
);
5051 vec_mode
= TYPE_MODE (vectype
);
5053 /* FORNOW. In some cases can vectorize even if data-type not supported
5054 (e.g. - array initialization with 0). */
5055 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5058 if (!STMT_VINFO_DATA_REF (stmt_info
))
5062 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5063 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5064 size_zero_node
) < 0;
5065 if (negative
&& ncopies
> 1)
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5069 "multiple types with negative step.\n");
5075 gcc_assert (!grouped_store
);
5076 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5077 if (alignment_support_scheme
!= dr_aligned
5078 && alignment_support_scheme
!= dr_unaligned_supported
)
5080 if (dump_enabled_p ())
5081 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5082 "negative step but alignment required.\n");
5085 if (dt
!= vect_constant_def
5086 && dt
!= vect_external_def
5087 && !perm_mask_for_reverse (vectype
))
5089 if (dump_enabled_p ())
5090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5091 "negative step and reversing not supported.\n");
5096 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5098 grouped_store
= true;
5099 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5100 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5102 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5103 if (vect_store_lanes_supported (vectype
, group_size
))
5104 store_lanes_p
= true;
5105 else if (!vect_grouped_store_supported (vectype
, group_size
))
5109 if (first_stmt
== stmt
)
5111 /* STMT is the leader of the group. Check the operands of all the
5112 stmts of the group. */
5113 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5116 gcc_assert (gimple_assign_single_p (next_stmt
));
5117 op
= gimple_assign_rhs1 (next_stmt
);
5118 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5119 &def_stmt
, &def
, &dt
))
5121 if (dump_enabled_p ())
5122 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5123 "use not simple.\n");
5126 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5131 if (!vec_stmt
) /* transformation not required. */
5133 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5134 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5141 ensure_base_align (stmt_info
, dr
);
5145 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5146 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5148 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5151 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5153 /* We vectorize all the stmts of the interleaving group when we
5154 reach the last stmt in the group. */
5155 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5156 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5165 grouped_store
= false;
5166 /* VEC_NUM is the number of vect stmts to be created for this
5168 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5169 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5170 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5171 op
= gimple_assign_rhs1 (first_stmt
);
5174 /* VEC_NUM is the number of vect stmts to be created for this
5176 vec_num
= group_size
;
5182 group_size
= vec_num
= 1;
5185 if (dump_enabled_p ())
5186 dump_printf_loc (MSG_NOTE
, vect_location
,
5187 "transform store. ncopies = %d\n", ncopies
);
5189 dr_chain
.create (group_size
);
5190 oprnds
.create (group_size
);
5192 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5193 gcc_assert (alignment_support_scheme
);
5194 /* Targets with store-lane instructions must not require explicit
5196 gcc_assert (!store_lanes_p
5197 || alignment_support_scheme
== dr_aligned
5198 || alignment_support_scheme
== dr_unaligned_supported
);
5201 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5204 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5206 aggr_type
= vectype
;
5208 /* In case the vectorization factor (VF) is bigger than the number
5209 of elements that we can fit in a vectype (nunits), we have to generate
5210 more than one vector stmt - i.e - we need to "unroll" the
5211 vector stmt by a factor VF/nunits. For more details see documentation in
5212 vect_get_vec_def_for_copy_stmt. */
5214 /* In case of interleaving (non-unit grouped access):
5221 We create vectorized stores starting from base address (the access of the
5222 first stmt in the chain (S2 in the above example), when the last store stmt
5223 of the chain (S4) is reached:
5226 VS2: &base + vec_size*1 = vx0
5227 VS3: &base + vec_size*2 = vx1
5228 VS4: &base + vec_size*3 = vx3
5230 Then permutation statements are generated:
5232 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5233 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5236 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5237 (the order of the data-refs in the output of vect_permute_store_chain
5238 corresponds to the order of scalar stmts in the interleaving chain - see
5239 the documentation of vect_permute_store_chain()).
5241 In case of both multiple types and interleaving, above vector stores and
5242 permutation stmts are created for every copy. The result vector stmts are
5243 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5244 STMT_VINFO_RELATED_STMT for the next copies.
5247 prev_stmt_info
= NULL
;
5248 for (j
= 0; j
< ncopies
; j
++)
5256 /* Get vectorized arguments for SLP_NODE. */
5257 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5258 NULL
, slp_node
, -1);
5260 vec_oprnd
= vec_oprnds
[0];
5264 /* For interleaved stores we collect vectorized defs for all the
5265 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5266 used as an input to vect_permute_store_chain(), and OPRNDS as
5267 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5269 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5270 OPRNDS are of size 1. */
5271 next_stmt
= first_stmt
;
5272 for (i
= 0; i
< group_size
; i
++)
5274 /* Since gaps are not supported for interleaved stores,
5275 GROUP_SIZE is the exact number of stmts in the chain.
5276 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5277 there is no interleaving, GROUP_SIZE is 1, and only one
5278 iteration of the loop will be executed. */
5279 gcc_assert (next_stmt
5280 && gimple_assign_single_p (next_stmt
));
5281 op
= gimple_assign_rhs1 (next_stmt
);
5283 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5285 dr_chain
.quick_push (vec_oprnd
);
5286 oprnds
.quick_push (vec_oprnd
);
5287 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5291 /* We should have catched mismatched types earlier. */
5292 gcc_assert (useless_type_conversion_p (vectype
,
5293 TREE_TYPE (vec_oprnd
)));
5294 bool simd_lane_access_p
5295 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5296 if (simd_lane_access_p
5297 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5298 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5299 && integer_zerop (DR_OFFSET (first_dr
))
5300 && integer_zerop (DR_INIT (first_dr
))
5301 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5302 get_alias_set (DR_REF (first_dr
))))
5304 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5305 dataref_offset
= build_int_cst (reference_alias_ptr_type
5306 (DR_REF (first_dr
)), 0);
5311 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5312 simd_lane_access_p
? loop
: NULL
,
5313 offset
, &dummy
, gsi
, &ptr_incr
,
5314 simd_lane_access_p
, &inv_p
);
5315 gcc_assert (bb_vinfo
|| !inv_p
);
5319 /* For interleaved stores we created vectorized defs for all the
5320 defs stored in OPRNDS in the previous iteration (previous copy).
5321 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5322 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5324 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5325 OPRNDS are of size 1. */
5326 for (i
= 0; i
< group_size
; i
++)
5329 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5331 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5332 dr_chain
[i
] = vec_oprnd
;
5333 oprnds
[i
] = vec_oprnd
;
5337 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5338 TYPE_SIZE_UNIT (aggr_type
));
5340 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5341 TYPE_SIZE_UNIT (aggr_type
));
5348 /* Combine all the vectors into an array. */
5349 vec_array
= create_vector_array (vectype
, vec_num
);
5350 for (i
= 0; i
< vec_num
; i
++)
5352 vec_oprnd
= dr_chain
[i
];
5353 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5357 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5358 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5359 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5360 gimple_call_set_lhs (new_stmt
, data_ref
);
5361 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5369 result_chain
.create (group_size
);
5371 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5375 next_stmt
= first_stmt
;
5376 for (i
= 0; i
< vec_num
; i
++)
5378 unsigned align
, misalign
;
5381 /* Bump the vector pointer. */
5382 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5386 vec_oprnd
= vec_oprnds
[i
];
5387 else if (grouped_store
)
5388 /* For grouped stores vectorized defs are interleaved in
5389 vect_permute_store_chain(). */
5390 vec_oprnd
= result_chain
[i
];
5392 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5395 : build_int_cst (reference_alias_ptr_type
5396 (DR_REF (first_dr
)), 0));
5397 align
= TYPE_ALIGN_UNIT (vectype
);
5398 if (aligned_access_p (first_dr
))
5400 else if (DR_MISALIGNMENT (first_dr
) == -1)
5402 TREE_TYPE (data_ref
)
5403 = build_aligned_type (TREE_TYPE (data_ref
),
5404 TYPE_ALIGN (elem_type
));
5405 align
= TYPE_ALIGN_UNIT (elem_type
);
5410 TREE_TYPE (data_ref
)
5411 = build_aligned_type (TREE_TYPE (data_ref
),
5412 TYPE_ALIGN (elem_type
));
5413 misalign
= DR_MISALIGNMENT (first_dr
);
5415 if (dataref_offset
== NULL_TREE
)
5416 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5420 && dt
!= vect_constant_def
5421 && dt
!= vect_external_def
)
5423 tree perm_mask
= perm_mask_for_reverse (vectype
);
5425 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5427 tree new_temp
= make_ssa_name (perm_dest
, NULL
);
5429 /* Generate the permute statement. */
5431 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, new_temp
,
5432 vec_oprnd
, vec_oprnd
,
5434 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5436 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5437 vec_oprnd
= new_temp
;
5440 /* Arguments are ready. Create the new vector stmt. */
5441 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5442 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5447 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5455 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5457 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5458 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5462 dr_chain
.release ();
5464 result_chain
.release ();
5465 vec_oprnds
.release ();
5470 /* Given a vector type VECTYPE and permutation SEL returns
5471 the VECTOR_CST mask that implements the permutation of the
5472 vector elements. If that is impossible to do, returns NULL. */
5475 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
5477 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5480 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5482 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5485 mask_elt_type
= lang_hooks
.types
.type_for_mode
5486 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5487 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5489 mask_elts
= XALLOCAVEC (tree
, nunits
);
5490 for (i
= nunits
- 1; i
>= 0; i
--)
5491 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5492 mask_vec
= build_vector (mask_type
, mask_elts
);
5497 /* Given a vector variable X and Y, that was generated for the scalar
5498 STMT, generate instructions to permute the vector elements of X and Y
5499 using permutation mask MASK_VEC, insert them at *GSI and return the
5500 permuted vector variable. */
5503 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5504 gimple_stmt_iterator
*gsi
)
5506 tree vectype
= TREE_TYPE (x
);
5507 tree perm_dest
, data_ref
;
5510 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5511 data_ref
= make_ssa_name (perm_dest
, NULL
);
5513 /* Generate the permute statement. */
5514 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
5516 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5521 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5522 inserting them on the loops preheader edge. Returns true if we
5523 were successful in doing so (and thus STMT can be moved then),
5524 otherwise returns false. */
5527 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5533 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5535 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5536 if (!gimple_nop_p (def_stmt
)
5537 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5539 /* Make sure we don't need to recurse. While we could do
5540 so in simple cases when there are more complex use webs
5541 we don't have an easy way to preserve stmt order to fulfil
5542 dependencies within them. */
5545 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5547 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5549 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5550 if (!gimple_nop_p (def_stmt2
)
5551 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5561 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5563 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5564 if (!gimple_nop_p (def_stmt
)
5565 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5567 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5568 gsi_remove (&gsi
, false);
5569 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5576 /* vectorizable_load.
5578 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5580 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5581 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5582 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5585 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5586 slp_tree slp_node
, slp_instance slp_node_instance
)
5589 tree vec_dest
= NULL
;
5590 tree data_ref
= NULL
;
5591 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5592 stmt_vec_info prev_stmt_info
;
5593 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5594 struct loop
*loop
= NULL
;
5595 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5596 bool nested_in_vect_loop
= false;
5597 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5598 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5601 enum machine_mode mode
;
5602 gimple new_stmt
= NULL
;
5604 enum dr_alignment_support alignment_support_scheme
;
5605 tree dataref_ptr
= NULL_TREE
;
5606 tree dataref_offset
= NULL_TREE
;
5607 gimple ptr_incr
= NULL
;
5608 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5610 int i
, j
, group_size
, group_gap
;
5611 tree msq
= NULL_TREE
, lsq
;
5612 tree offset
= NULL_TREE
;
5613 tree byte_offset
= NULL_TREE
;
5614 tree realignment_token
= NULL_TREE
;
5616 vec
<tree
> dr_chain
= vNULL
;
5617 bool grouped_load
= false;
5618 bool load_lanes_p
= false;
5621 bool negative
= false;
5622 bool compute_in_loop
= false;
5623 struct loop
*at_loop
;
5625 bool slp
= (slp_node
!= NULL
);
5626 bool slp_perm
= false;
5627 enum tree_code code
;
5628 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5631 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5632 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5633 int gather_scale
= 1;
5634 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5638 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5639 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5640 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5645 /* Multiple types in SLP are handled by creating the appropriate number of
5646 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5648 if (slp
|| PURE_SLP_STMT (stmt_info
))
5651 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5653 gcc_assert (ncopies
>= 1);
5655 /* FORNOW. This restriction should be relaxed. */
5656 if (nested_in_vect_loop
&& ncopies
> 1)
5658 if (dump_enabled_p ())
5659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5660 "multiple types in nested loop.\n");
5664 /* Invalidate assumptions made by dependence analysis when vectorization
5665 on the unrolled body effectively re-orders stmts. */
5667 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5668 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5669 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5671 if (dump_enabled_p ())
5672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5673 "cannot perform implicit CSE when unrolling "
5674 "with negative dependence distance\n");
5678 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5681 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5684 /* Is vectorizable load? */
5685 if (!is_gimple_assign (stmt
))
5688 scalar_dest
= gimple_assign_lhs (stmt
);
5689 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5692 code
= gimple_assign_rhs_code (stmt
);
5693 if (code
!= ARRAY_REF
5694 && code
!= BIT_FIELD_REF
5695 && code
!= INDIRECT_REF
5696 && code
!= COMPONENT_REF
5697 && code
!= IMAGPART_EXPR
5698 && code
!= REALPART_EXPR
5700 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5703 if (!STMT_VINFO_DATA_REF (stmt_info
))
5706 elem_type
= TREE_TYPE (vectype
);
5707 mode
= TYPE_MODE (vectype
);
5709 /* FORNOW. In some cases can vectorize even if data-type not supported
5710 (e.g. - data copies). */
5711 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5713 if (dump_enabled_p ())
5714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5715 "Aligned load, but unsupported type.\n");
5719 /* Check if the load is a part of an interleaving chain. */
5720 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5722 grouped_load
= true;
5724 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5726 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5727 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5729 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5730 if (vect_load_lanes_supported (vectype
, group_size
))
5731 load_lanes_p
= true;
5732 else if (!vect_grouped_load_supported (vectype
, group_size
))
5736 /* Invalidate assumptions made by dependence analysis when vectorization
5737 on the unrolled body effectively re-orders stmts. */
5738 if (!PURE_SLP_STMT (stmt_info
)
5739 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5740 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5741 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5743 if (dump_enabled_p ())
5744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5745 "cannot perform implicit CSE when performing "
5746 "group loads with negative dependence distance\n");
5752 if (STMT_VINFO_GATHER_P (stmt_info
))
5756 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5757 &gather_off
, &gather_scale
);
5758 gcc_assert (gather_decl
);
5759 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5760 &def_stmt
, &def
, &gather_dt
,
5761 &gather_off_vectype
))
5763 if (dump_enabled_p ())
5764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5765 "gather index use not simple.\n");
5769 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
5773 negative
= tree_int_cst_compare (nested_in_vect_loop
5774 ? STMT_VINFO_DR_STEP (stmt_info
)
5776 size_zero_node
) < 0;
5777 if (negative
&& ncopies
> 1)
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5781 "multiple types with negative step.\n");
5789 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5791 "negative step for group load not supported"
5795 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5796 if (alignment_support_scheme
!= dr_aligned
5797 && alignment_support_scheme
!= dr_unaligned_supported
)
5799 if (dump_enabled_p ())
5800 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5801 "negative step but alignment required.\n");
5804 if (!perm_mask_for_reverse (vectype
))
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5808 "negative step and reversing not supported."
5815 if (!vec_stmt
) /* transformation not required. */
5817 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
5818 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_NOTE
, vect_location
,
5824 "transform load. ncopies = %d\n", ncopies
);
5828 ensure_base_align (stmt_info
, dr
);
5830 if (STMT_VINFO_GATHER_P (stmt_info
))
5832 tree vec_oprnd0
= NULL_TREE
, op
;
5833 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
5834 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5835 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
5836 edge pe
= loop_preheader_edge (loop
);
5839 enum { NARROW
, NONE
, WIDEN
} modifier
;
5840 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
5842 if (nunits
== gather_off_nunits
)
5844 else if (nunits
== gather_off_nunits
/ 2)
5846 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
5849 for (i
= 0; i
< gather_off_nunits
; ++i
)
5850 sel
[i
] = i
| nunits
;
5852 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
5853 gcc_assert (perm_mask
!= NULL_TREE
);
5855 else if (nunits
== gather_off_nunits
* 2)
5857 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5860 for (i
= 0; i
< nunits
; ++i
)
5861 sel
[i
] = i
< gather_off_nunits
5862 ? i
: i
+ nunits
- gather_off_nunits
;
5864 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
5865 gcc_assert (perm_mask
!= NULL_TREE
);
5871 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
5872 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5873 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5874 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5875 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5876 scaletype
= TREE_VALUE (arglist
);
5877 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
5879 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5881 ptr
= fold_convert (ptrtype
, gather_base
);
5882 if (!is_gimple_min_invariant (ptr
))
5884 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5885 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5886 gcc_assert (!new_bb
);
5889 /* Currently we support only unconditional gather loads,
5890 so mask should be all ones. */
5891 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
5892 mask
= build_int_cst (masktype
, -1);
5893 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
5895 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
5896 mask
= build_vector_from_val (masktype
, mask
);
5897 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5899 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
5903 for (j
= 0; j
< 6; ++j
)
5905 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
5906 mask
= build_real (TREE_TYPE (masktype
), r
);
5907 mask
= build_vector_from_val (masktype
, mask
);
5908 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5913 scale
= build_int_cst (scaletype
, gather_scale
);
5915 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
5916 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
5917 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
5921 for (j
= 0; j
< 6; ++j
)
5923 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
5924 merge
= build_real (TREE_TYPE (rettype
), r
);
5928 merge
= build_vector_from_val (rettype
, merge
);
5929 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
5931 prev_stmt_info
= NULL
;
5932 for (j
= 0; j
< ncopies
; ++j
)
5934 if (modifier
== WIDEN
&& (j
& 1))
5935 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
5936 perm_mask
, stmt
, gsi
);
5939 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
5942 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
5944 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5946 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5947 == TYPE_VECTOR_SUBPARTS (idxtype
));
5948 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
5949 var
= make_ssa_name (var
, NULL
);
5950 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5952 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
5954 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5959 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
5961 if (!useless_type_conversion_p (vectype
, rettype
))
5963 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
5964 == TYPE_VECTOR_SUBPARTS (rettype
));
5965 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
5966 op
= make_ssa_name (var
, new_stmt
);
5967 gimple_call_set_lhs (new_stmt
, op
);
5968 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5969 var
= make_ssa_name (vec_dest
, NULL
);
5970 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
5972 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
5977 var
= make_ssa_name (vec_dest
, new_stmt
);
5978 gimple_call_set_lhs (new_stmt
, var
);
5981 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5983 if (modifier
== NARROW
)
5990 var
= permute_vec_elements (prev_res
, var
,
5991 perm_mask
, stmt
, gsi
);
5992 new_stmt
= SSA_NAME_DEF_STMT (var
);
5995 if (prev_stmt_info
== NULL
)
5996 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5998 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5999 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6003 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
6005 gimple_stmt_iterator incr_gsi
;
6011 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6012 gimple_seq stmts
= NULL
;
6013 tree stride_base
, stride_step
, alias_off
;
6015 gcc_assert (!nested_in_vect_loop
);
6018 = fold_build_pointer_plus
6019 (unshare_expr (DR_BASE_ADDRESS (dr
)),
6020 size_binop (PLUS_EXPR
,
6021 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
6022 convert_to_ptrofftype (DR_INIT (dr
))));
6023 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
6025 /* For a load with loop-invariant (but other than power-of-2)
6026 stride (i.e. not a grouped access) like so:
6028 for (i = 0; i < n; i += stride)
6031 we generate a new induction variable and new accesses to
6032 form a new vector (or vectors, depending on ncopies):
6034 for (j = 0; ; j += VF*stride)
6036 tmp2 = array[j + stride];
6038 vectemp = {tmp1, tmp2, ...}
6041 ivstep
= stride_step
;
6042 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6043 build_int_cst (TREE_TYPE (ivstep
), vf
));
6045 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6047 create_iv (stride_base
, ivstep
, NULL
,
6048 loop
, &incr_gsi
, insert_after
,
6050 incr
= gsi_stmt (incr_gsi
);
6051 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6053 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6055 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6057 prev_stmt_info
= NULL
;
6058 running_off
= offvar
;
6059 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
6060 for (j
= 0; j
< ncopies
; j
++)
6064 vec_alloc (v
, nunits
);
6065 for (i
= 0; i
< nunits
; i
++)
6067 tree newref
, newoff
;
6069 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
6070 running_off
, alias_off
);
6072 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6075 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6076 newoff
= copy_ssa_name (running_off
, NULL
);
6077 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
6078 running_off
, stride_step
);
6079 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6081 running_off
= newoff
;
6084 vec_inv
= build_constructor (vectype
, v
);
6085 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6086 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6089 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6091 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6092 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6099 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6101 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6102 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6103 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6105 /* Check if the chain of loads is already vectorized. */
6106 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6107 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6108 ??? But we can only do so if there is exactly one
6109 as we have no way to get at the rest. Leave the CSE
6111 ??? With the group load eventually participating
6112 in multiple different permutations (having multiple
6113 slp nodes which refer to the same group) the CSE
6114 is even wrong code. See PR56270. */
6117 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6120 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6121 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6123 /* VEC_NUM is the number of vect stmts to be created for this group. */
6126 grouped_load
= false;
6127 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6128 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6130 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6134 vec_num
= group_size
;
6142 group_size
= vec_num
= 1;
6146 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6147 gcc_assert (alignment_support_scheme
);
6148 /* Targets with load-lane instructions must not require explicit
6150 gcc_assert (!load_lanes_p
6151 || alignment_support_scheme
== dr_aligned
6152 || alignment_support_scheme
== dr_unaligned_supported
);
6154 /* In case the vectorization factor (VF) is bigger than the number
6155 of elements that we can fit in a vectype (nunits), we have to generate
6156 more than one vector stmt - i.e - we need to "unroll" the
6157 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6158 from one copy of the vector stmt to the next, in the field
6159 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6160 stages to find the correct vector defs to be used when vectorizing
6161 stmts that use the defs of the current stmt. The example below
6162 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6163 need to create 4 vectorized stmts):
6165 before vectorization:
6166 RELATED_STMT VEC_STMT
6170 step 1: vectorize stmt S1:
6171 We first create the vector stmt VS1_0, and, as usual, record a
6172 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6173 Next, we create the vector stmt VS1_1, and record a pointer to
6174 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6175 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6177 RELATED_STMT VEC_STMT
6178 VS1_0: vx0 = memref0 VS1_1 -
6179 VS1_1: vx1 = memref1 VS1_2 -
6180 VS1_2: vx2 = memref2 VS1_3 -
6181 VS1_3: vx3 = memref3 - -
6182 S1: x = load - VS1_0
6185 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6186 information we recorded in RELATED_STMT field is used to vectorize
6189 /* In case of interleaving (non-unit grouped access):
6196 Vectorized loads are created in the order of memory accesses
6197 starting from the access of the first stmt of the chain:
6200 VS2: vx1 = &base + vec_size*1
6201 VS3: vx3 = &base + vec_size*2
6202 VS4: vx4 = &base + vec_size*3
6204 Then permutation statements are generated:
6206 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6207 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6210 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6211 (the order of the data-refs in the output of vect_permute_load_chain
6212 corresponds to the order of scalar stmts in the interleaving chain - see
6213 the documentation of vect_permute_load_chain()).
6214 The generation of permutation stmts and recording them in
6215 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6217 In case of both multiple types and interleaving, the vector loads and
6218 permutation stmts above are created for every copy. The result vector
6219 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6220 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6222 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6223 on a target that supports unaligned accesses (dr_unaligned_supported)
6224 we generate the following code:
6228 p = p + indx * vectype_size;
6233 Otherwise, the data reference is potentially unaligned on a target that
6234 does not support unaligned accesses (dr_explicit_realign_optimized) -
6235 then generate the following code, in which the data in each iteration is
6236 obtained by two vector loads, one from the previous iteration, and one
6237 from the current iteration:
6239 msq_init = *(floor(p1))
6240 p2 = initial_addr + VS - 1;
6241 realignment_token = call target_builtin;
6244 p2 = p2 + indx * vectype_size
6246 vec_dest = realign_load (msq, lsq, realignment_token)
6251 /* If the misalignment remains the same throughout the execution of the
6252 loop, we can create the init_addr and permutation mask at the loop
6253 preheader. Otherwise, it needs to be created inside the loop.
6254 This can only occur when vectorizing memory accesses in the inner-loop
6255 nested within an outer-loop that is being vectorized. */
6257 if (nested_in_vect_loop
6258 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6259 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6261 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6262 compute_in_loop
= true;
6265 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6266 || alignment_support_scheme
== dr_explicit_realign
)
6267 && !compute_in_loop
)
6269 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6270 alignment_support_scheme
, NULL_TREE
,
6272 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6274 phi
= SSA_NAME_DEF_STMT (msq
);
6275 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6283 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6286 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6288 aggr_type
= vectype
;
6290 prev_stmt_info
= NULL
;
6291 for (j
= 0; j
< ncopies
; j
++)
6293 /* 1. Create the vector or array pointer update chain. */
6296 bool simd_lane_access_p
6297 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6298 if (simd_lane_access_p
6299 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6300 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6301 && integer_zerop (DR_OFFSET (first_dr
))
6302 && integer_zerop (DR_INIT (first_dr
))
6303 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6304 get_alias_set (DR_REF (first_dr
)))
6305 && (alignment_support_scheme
== dr_aligned
6306 || alignment_support_scheme
== dr_unaligned_supported
))
6308 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6309 dataref_offset
= build_int_cst (reference_alias_ptr_type
6310 (DR_REF (first_dr
)), 0);
6315 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6316 offset
, &dummy
, gsi
, &ptr_incr
,
6317 simd_lane_access_p
, &inv_p
,
6320 else if (dataref_offset
)
6321 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6322 TYPE_SIZE_UNIT (aggr_type
));
6324 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6325 TYPE_SIZE_UNIT (aggr_type
));
6327 if (grouped_load
|| slp_perm
)
6328 dr_chain
.create (vec_num
);
6334 vec_array
= create_vector_array (vectype
, vec_num
);
6337 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6338 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6339 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6340 gimple_call_set_lhs (new_stmt
, vec_array
);
6341 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6343 /* Extract each vector into an SSA_NAME. */
6344 for (i
= 0; i
< vec_num
; i
++)
6346 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6348 dr_chain
.quick_push (new_temp
);
6351 /* Record the mapping between SSA_NAMEs and statements. */
6352 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6356 for (i
= 0; i
< vec_num
; i
++)
6359 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6362 /* 2. Create the vector-load in the loop. */
6363 switch (alignment_support_scheme
)
6366 case dr_unaligned_supported
:
6368 unsigned int align
, misalign
;
6371 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6374 : build_int_cst (reference_alias_ptr_type
6375 (DR_REF (first_dr
)), 0));
6376 align
= TYPE_ALIGN_UNIT (vectype
);
6377 if (alignment_support_scheme
== dr_aligned
)
6379 gcc_assert (aligned_access_p (first_dr
));
6382 else if (DR_MISALIGNMENT (first_dr
) == -1)
6384 TREE_TYPE (data_ref
)
6385 = build_aligned_type (TREE_TYPE (data_ref
),
6386 TYPE_ALIGN (elem_type
));
6387 align
= TYPE_ALIGN_UNIT (elem_type
);
6392 TREE_TYPE (data_ref
)
6393 = build_aligned_type (TREE_TYPE (data_ref
),
6394 TYPE_ALIGN (elem_type
));
6395 misalign
= DR_MISALIGNMENT (first_dr
);
6397 if (dataref_offset
== NULL_TREE
)
6398 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6402 case dr_explicit_realign
:
6407 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6409 if (compute_in_loop
)
6410 msq
= vect_setup_realignment (first_stmt
, gsi
,
6412 dr_explicit_realign
,
6415 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
6416 new_stmt
= gimple_build_assign_with_ops
6417 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
6419 (TREE_TYPE (dataref_ptr
),
6420 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6421 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6423 = build2 (MEM_REF
, vectype
, ptr
,
6424 build_int_cst (reference_alias_ptr_type
6425 (DR_REF (first_dr
)), 0));
6426 vec_dest
= vect_create_destination_var (scalar_dest
,
6428 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6429 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6430 gimple_assign_set_lhs (new_stmt
, new_temp
);
6431 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6432 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6433 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6436 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
6437 TYPE_SIZE_UNIT (elem_type
));
6438 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6439 new_stmt
= gimple_build_assign_with_ops
6440 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
6443 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6444 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6445 gimple_assign_set_lhs (new_stmt
, ptr
);
6446 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6448 = build2 (MEM_REF
, vectype
, ptr
,
6449 build_int_cst (reference_alias_ptr_type
6450 (DR_REF (first_dr
)), 0));
6453 case dr_explicit_realign_optimized
:
6454 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
6455 new_stmt
= gimple_build_assign_with_ops
6456 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
6458 (TREE_TYPE (dataref_ptr
),
6459 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6460 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6462 = build2 (MEM_REF
, vectype
, new_temp
,
6463 build_int_cst (reference_alias_ptr_type
6464 (DR_REF (first_dr
)), 0));
6469 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6470 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6471 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6472 gimple_assign_set_lhs (new_stmt
, new_temp
);
6473 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6475 /* 3. Handle explicit realignment if necessary/supported.
6477 vec_dest = realign_load (msq, lsq, realignment_token) */
6478 if (alignment_support_scheme
== dr_explicit_realign_optimized
6479 || alignment_support_scheme
== dr_explicit_realign
)
6481 lsq
= gimple_assign_lhs (new_stmt
);
6482 if (!realignment_token
)
6483 realignment_token
= dataref_ptr
;
6484 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6486 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
6489 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6490 gimple_assign_set_lhs (new_stmt
, new_temp
);
6491 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6493 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6496 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6497 add_phi_arg (phi
, lsq
,
6498 loop_latch_edge (containing_loop
),
6504 /* 4. Handle invariant-load. */
6505 if (inv_p
&& !bb_vinfo
)
6507 gcc_assert (!grouped_load
);
6508 /* If we have versioned for aliasing or the loop doesn't
6509 have any data dependencies that would preclude this,
6510 then we are sure this is a loop invariant load and
6511 thus we can insert it on the preheader edge. */
6512 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6513 && !nested_in_vect_loop
6514 && hoist_defs_of_uses (stmt
, loop
))
6516 if (dump_enabled_p ())
6518 dump_printf_loc (MSG_NOTE
, vect_location
,
6519 "hoisting out of the vectorized "
6521 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6522 dump_printf (MSG_NOTE
, "\n");
6524 tree tem
= copy_ssa_name (scalar_dest
, NULL
);
6525 gsi_insert_on_edge_immediate
6526 (loop_preheader_edge (loop
),
6527 gimple_build_assign (tem
,
6529 (gimple_assign_rhs1 (stmt
))));
6530 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6534 gimple_stmt_iterator gsi2
= *gsi
;
6536 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6539 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6540 set_vinfo_for_stmt (new_stmt
,
6541 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6547 tree perm_mask
= perm_mask_for_reverse (vectype
);
6548 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6549 perm_mask
, stmt
, gsi
);
6550 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6553 /* Collect vector loads and later create their permutation in
6554 vect_transform_grouped_load (). */
6555 if (grouped_load
|| slp_perm
)
6556 dr_chain
.quick_push (new_temp
);
6558 /* Store vector loads in the corresponding SLP_NODE. */
6559 if (slp
&& !slp_perm
)
6560 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6562 /* Bump the vector pointer to account for a gap. */
6563 if (slp
&& group_gap
!= 0)
6565 tree bump
= size_binop (MULT_EXPR
,
6566 TYPE_SIZE_UNIT (elem_type
),
6567 size_int (group_gap
));
6568 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6573 if (slp
&& !slp_perm
)
6578 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6579 slp_node_instance
, false))
6581 dr_chain
.release ();
6590 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6591 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6596 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6598 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6599 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6602 dr_chain
.release ();
6608 /* Function vect_is_simple_cond.
6611 LOOP - the loop that is being vectorized.
6612 COND - Condition that is checked for simple use.
6615 *COMP_VECTYPE - the vector type for the comparison.
6617 Returns whether a COND can be vectorized. Checks whether
6618 condition operands are supportable using vec_is_simple_use. */
6621 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6622 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6626 enum vect_def_type dt
;
6627 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6629 if (!COMPARISON_CLASS_P (cond
))
6632 lhs
= TREE_OPERAND (cond
, 0);
6633 rhs
= TREE_OPERAND (cond
, 1);
6635 if (TREE_CODE (lhs
) == SSA_NAME
)
6637 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6638 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6639 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6642 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6643 && TREE_CODE (lhs
) != FIXED_CST
)
6646 if (TREE_CODE (rhs
) == SSA_NAME
)
6648 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6649 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6650 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6653 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6654 && TREE_CODE (rhs
) != FIXED_CST
)
6657 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6661 /* vectorizable_condition.
6663 Check if STMT is conditional modify expression that can be vectorized.
6664 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6665 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6668 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6669 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6670 else caluse if it is 2).
6672 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6675 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6676 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6679 tree scalar_dest
= NULL_TREE
;
6680 tree vec_dest
= NULL_TREE
;
6681 tree cond_expr
, then_clause
, else_clause
;
6682 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6683 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6684 tree comp_vectype
= NULL_TREE
;
6685 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6686 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6687 tree vec_compare
, vec_cond_expr
;
6689 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6691 enum vect_def_type dt
, dts
[4];
6692 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6694 enum tree_code code
;
6695 stmt_vec_info prev_stmt_info
= NULL
;
6697 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6698 vec
<tree
> vec_oprnds0
= vNULL
;
6699 vec
<tree
> vec_oprnds1
= vNULL
;
6700 vec
<tree
> vec_oprnds2
= vNULL
;
6701 vec
<tree
> vec_oprnds3
= vNULL
;
6704 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6707 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6709 gcc_assert (ncopies
>= 1);
6710 if (reduc_index
&& ncopies
> 1)
6711 return false; /* FORNOW */
6713 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6716 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6719 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6720 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6724 /* FORNOW: not yet supported. */
6725 if (STMT_VINFO_LIVE_P (stmt_info
))
6727 if (dump_enabled_p ())
6728 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6729 "value used after loop.\n");
6733 /* Is vectorizable conditional operation? */
6734 if (!is_gimple_assign (stmt
))
6737 code
= gimple_assign_rhs_code (stmt
);
6739 if (code
!= COND_EXPR
)
6742 cond_expr
= gimple_assign_rhs1 (stmt
);
6743 then_clause
= gimple_assign_rhs2 (stmt
);
6744 else_clause
= gimple_assign_rhs3 (stmt
);
6746 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
6751 if (TREE_CODE (then_clause
) == SSA_NAME
)
6753 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6754 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6755 &then_def_stmt
, &def
, &dt
))
6758 else if (TREE_CODE (then_clause
) != INTEGER_CST
6759 && TREE_CODE (then_clause
) != REAL_CST
6760 && TREE_CODE (then_clause
) != FIXED_CST
)
6763 if (TREE_CODE (else_clause
) == SSA_NAME
)
6765 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6766 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6767 &else_def_stmt
, &def
, &dt
))
6770 else if (TREE_CODE (else_clause
) != INTEGER_CST
6771 && TREE_CODE (else_clause
) != REAL_CST
6772 && TREE_CODE (else_clause
) != FIXED_CST
)
6775 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
6776 /* The result of a vector comparison should be signed type. */
6777 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
6778 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
6779 if (vec_cmp_type
== NULL_TREE
)
6784 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6785 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
6792 vec_oprnds0
.create (1);
6793 vec_oprnds1
.create (1);
6794 vec_oprnds2
.create (1);
6795 vec_oprnds3
.create (1);
6799 scalar_dest
= gimple_assign_lhs (stmt
);
6800 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6802 /* Handle cond expr. */
6803 for (j
= 0; j
< ncopies
; j
++)
6805 gimple new_stmt
= NULL
;
6810 auto_vec
<tree
, 4> ops
;
6811 auto_vec
<vec
<tree
>, 4> vec_defs
;
6813 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
6814 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
6815 ops
.safe_push (then_clause
);
6816 ops
.safe_push (else_clause
);
6817 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
6818 vec_oprnds3
= vec_defs
.pop ();
6819 vec_oprnds2
= vec_defs
.pop ();
6820 vec_oprnds1
= vec_defs
.pop ();
6821 vec_oprnds0
= vec_defs
.pop ();
6824 vec_defs
.release ();
6830 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
6832 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
6833 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
6836 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
6838 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
6839 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
6840 if (reduc_index
== 1)
6841 vec_then_clause
= reduc_def
;
6844 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
6846 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
6847 NULL
, >emp
, &def
, &dts
[2]);
6849 if (reduc_index
== 2)
6850 vec_else_clause
= reduc_def
;
6853 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
6855 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
6856 NULL
, >emp
, &def
, &dts
[3]);
6862 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
6863 vec_oprnds0
.pop ());
6864 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
6865 vec_oprnds1
.pop ());
6866 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
6867 vec_oprnds2
.pop ());
6868 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
6869 vec_oprnds3
.pop ());
6874 vec_oprnds0
.quick_push (vec_cond_lhs
);
6875 vec_oprnds1
.quick_push (vec_cond_rhs
);
6876 vec_oprnds2
.quick_push (vec_then_clause
);
6877 vec_oprnds3
.quick_push (vec_else_clause
);
6880 /* Arguments are ready. Create the new vector stmt. */
6881 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
6883 vec_cond_rhs
= vec_oprnds1
[i
];
6884 vec_then_clause
= vec_oprnds2
[i
];
6885 vec_else_clause
= vec_oprnds3
[i
];
6887 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
6888 vec_cond_lhs
, vec_cond_rhs
);
6889 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
6890 vec_compare
, vec_then_clause
, vec_else_clause
);
6892 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
6893 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6894 gimple_assign_set_lhs (new_stmt
, new_temp
);
6895 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6897 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6904 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6906 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6908 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6911 vec_oprnds0
.release ();
6912 vec_oprnds1
.release ();
6913 vec_oprnds2
.release ();
6914 vec_oprnds3
.release ();
6920 /* Make sure the statement is vectorizable. */
6923 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
6925 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6926 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6927 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
6929 tree scalar_type
, vectype
;
6930 gimple pattern_stmt
;
6931 gimple_seq pattern_def_seq
;
6933 if (dump_enabled_p ())
6935 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
6936 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6937 dump_printf (MSG_NOTE
, "\n");
6940 if (gimple_has_volatile_ops (stmt
))
6942 if (dump_enabled_p ())
6943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6944 "not vectorized: stmt has volatile operands\n");
6949 /* Skip stmts that do not need to be vectorized. In loops this is expected
6951 - the COND_EXPR which is the loop exit condition
6952 - any LABEL_EXPRs in the loop
6953 - computations that are used only for array indexing or loop control.
6954 In basic blocks we only analyze statements that are a part of some SLP
6955 instance, therefore, all the statements are relevant.
6957 Pattern statement needs to be analyzed instead of the original statement
6958 if the original statement is not relevant. Otherwise, we analyze both
6959 statements. In basic blocks we are called from some SLP instance
6960 traversal, don't analyze pattern stmts instead, the pattern stmts
6961 already will be part of SLP instance. */
6963 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
6964 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
6965 && !STMT_VINFO_LIVE_P (stmt_info
))
6967 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6969 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6970 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6972 /* Analyze PATTERN_STMT instead of the original stmt. */
6973 stmt
= pattern_stmt
;
6974 stmt_info
= vinfo_for_stmt (pattern_stmt
);
6975 if (dump_enabled_p ())
6977 dump_printf_loc (MSG_NOTE
, vect_location
,
6978 "==> examining pattern statement: ");
6979 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6980 dump_printf (MSG_NOTE
, "\n");
6985 if (dump_enabled_p ())
6986 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
6991 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6994 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6995 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6997 /* Analyze PATTERN_STMT too. */
6998 if (dump_enabled_p ())
7000 dump_printf_loc (MSG_NOTE
, vect_location
,
7001 "==> examining pattern statement: ");
7002 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7003 dump_printf (MSG_NOTE
, "\n");
7006 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7010 if (is_pattern_stmt_p (stmt_info
)
7012 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7014 gimple_stmt_iterator si
;
7016 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7018 gimple pattern_def_stmt
= gsi_stmt (si
);
7019 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7020 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7022 /* Analyze def stmt of STMT if it's a pattern stmt. */
7023 if (dump_enabled_p ())
7025 dump_printf_loc (MSG_NOTE
, vect_location
,
7026 "==> examining pattern def statement: ");
7027 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7028 dump_printf (MSG_NOTE
, "\n");
7031 if (!vect_analyze_stmt (pattern_def_stmt
,
7032 need_to_vectorize
, node
))
7038 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7040 case vect_internal_def
:
7043 case vect_reduction_def
:
7044 case vect_nested_cycle
:
7045 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
7046 || relevance
== vect_used_in_outer_by_reduction
7047 || relevance
== vect_unused_in_scope
));
7050 case vect_induction_def
:
7051 case vect_constant_def
:
7052 case vect_external_def
:
7053 case vect_unknown_def_type
:
7060 gcc_assert (PURE_SLP_STMT (stmt_info
));
7062 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7063 if (dump_enabled_p ())
7065 dump_printf_loc (MSG_NOTE
, vect_location
,
7066 "get vectype for scalar type: ");
7067 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7068 dump_printf (MSG_NOTE
, "\n");
7071 vectype
= get_vectype_for_scalar_type (scalar_type
);
7074 if (dump_enabled_p ())
7076 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7077 "not SLPed: unsupported data-type ");
7078 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7080 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7085 if (dump_enabled_p ())
7087 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7088 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7089 dump_printf (MSG_NOTE
, "\n");
7092 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7095 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7097 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7098 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7099 || (is_gimple_call (stmt
)
7100 && gimple_call_lhs (stmt
) == NULL_TREE
));
7101 *need_to_vectorize
= true;
7106 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7107 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7108 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, NULL
)
7109 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
7110 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
7111 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
7112 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
7113 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
7114 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
7115 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
7116 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
7117 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
7121 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7122 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7123 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7124 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7125 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7126 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7127 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7128 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7129 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7134 if (dump_enabled_p ())
7136 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7137 "not vectorized: relevant stmt not ");
7138 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7139 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7140 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7149 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7150 need extra handling, except for vectorizable reductions. */
7151 if (STMT_VINFO_LIVE_P (stmt_info
)
7152 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7153 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7157 if (dump_enabled_p ())
7159 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7160 "not vectorized: live stmt not ");
7161 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7162 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7163 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7173 /* Function vect_transform_stmt.
7175 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7178 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7179 bool *grouped_store
, slp_tree slp_node
,
7180 slp_instance slp_node_instance
)
7182 bool is_store
= false;
7183 gimple vec_stmt
= NULL
;
7184 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7187 switch (STMT_VINFO_TYPE (stmt_info
))
7189 case type_demotion_vec_info_type
:
7190 case type_promotion_vec_info_type
:
7191 case type_conversion_vec_info_type
:
7192 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7196 case induc_vec_info_type
:
7197 gcc_assert (!slp_node
);
7198 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7202 case shift_vec_info_type
:
7203 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7207 case op_vec_info_type
:
7208 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7212 case assignment_vec_info_type
:
7213 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7217 case load_vec_info_type
:
7218 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7223 case store_vec_info_type
:
7224 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7226 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7228 /* In case of interleaving, the whole chain is vectorized when the
7229 last store in the chain is reached. Store stmts before the last
7230 one are skipped, and there vec_stmt_info shouldn't be freed
7232 *grouped_store
= true;
7233 if (STMT_VINFO_VEC_STMT (stmt_info
))
7240 case condition_vec_info_type
:
7241 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7245 case call_vec_info_type
:
7246 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7247 stmt
= gsi_stmt (*gsi
);
7248 if (is_gimple_call (stmt
)
7249 && gimple_call_internal_p (stmt
)
7250 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7254 case call_simd_clone_vec_info_type
:
7255 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7256 stmt
= gsi_stmt (*gsi
);
7259 case reduc_vec_info_type
:
7260 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7265 if (!STMT_VINFO_LIVE_P (stmt_info
))
7267 if (dump_enabled_p ())
7268 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7269 "stmt not supported.\n");
7274 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7275 is being vectorized, but outside the immediately enclosing loop. */
7277 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7278 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7279 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7280 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7281 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7282 || STMT_VINFO_RELEVANT (stmt_info
) ==
7283 vect_used_in_outer_by_reduction
))
7285 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7286 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7287 imm_use_iterator imm_iter
;
7288 use_operand_p use_p
;
7292 if (dump_enabled_p ())
7293 dump_printf_loc (MSG_NOTE
, vect_location
,
7294 "Record the vdef for outer-loop vectorization.\n");
7296 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7297 (to be used when vectorizing outer-loop stmts that use the DEF of
7299 if (gimple_code (stmt
) == GIMPLE_PHI
)
7300 scalar_dest
= PHI_RESULT (stmt
);
7302 scalar_dest
= gimple_assign_lhs (stmt
);
7304 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7306 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7308 exit_phi
= USE_STMT (use_p
);
7309 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7314 /* Handle stmts whose DEF is used outside the loop-nest that is
7315 being vectorized. */
7316 if (STMT_VINFO_LIVE_P (stmt_info
)
7317 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7319 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7324 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7330 /* Remove a group of stores (for SLP or interleaving), free their
7334 vect_remove_stores (gimple first_stmt
)
7336 gimple next
= first_stmt
;
7338 gimple_stmt_iterator next_si
;
7342 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7344 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7345 if (is_pattern_stmt_p (stmt_info
))
7346 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7347 /* Free the attached stmt_vec_info and remove the stmt. */
7348 next_si
= gsi_for_stmt (next
);
7349 unlink_stmt_vdef (next
);
7350 gsi_remove (&next_si
, true);
7351 release_defs (next
);
7352 free_stmt_vec_info (next
);
7358 /* Function new_stmt_vec_info.
7360 Create and initialize a new stmt_vec_info struct for STMT. */
7363 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7364 bb_vec_info bb_vinfo
)
7367 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7369 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7370 STMT_VINFO_STMT (res
) = stmt
;
7371 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7372 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7373 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7374 STMT_VINFO_LIVE_P (res
) = false;
7375 STMT_VINFO_VECTYPE (res
) = NULL
;
7376 STMT_VINFO_VEC_STMT (res
) = NULL
;
7377 STMT_VINFO_VECTORIZABLE (res
) = true;
7378 STMT_VINFO_IN_PATTERN_P (res
) = false;
7379 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7380 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7381 STMT_VINFO_DATA_REF (res
) = NULL
;
7383 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7384 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7385 STMT_VINFO_DR_INIT (res
) = NULL
;
7386 STMT_VINFO_DR_STEP (res
) = NULL
;
7387 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7389 if (gimple_code (stmt
) == GIMPLE_PHI
7390 && is_loop_header_bb_p (gimple_bb (stmt
)))
7391 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7393 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7395 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7396 STMT_SLP_TYPE (res
) = loop_vect
;
7397 GROUP_FIRST_ELEMENT (res
) = NULL
;
7398 GROUP_NEXT_ELEMENT (res
) = NULL
;
7399 GROUP_SIZE (res
) = 0;
7400 GROUP_STORE_COUNT (res
) = 0;
7401 GROUP_GAP (res
) = 0;
7402 GROUP_SAME_DR_STMT (res
) = NULL
;
7408 /* Create a hash table for stmt_vec_info. */
7411 init_stmt_vec_info_vec (void)
7413 gcc_assert (!stmt_vec_info_vec
.exists ());
7414 stmt_vec_info_vec
.create (50);
7418 /* Free hash table for stmt_vec_info. */
7421 free_stmt_vec_info_vec (void)
7425 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7427 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7428 gcc_assert (stmt_vec_info_vec
.exists ());
7429 stmt_vec_info_vec
.release ();
7433 /* Free stmt vectorization related info. */
7436 free_stmt_vec_info (gimple stmt
)
7438 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7443 /* Check if this statement has a related "pattern stmt"
7444 (introduced by the vectorizer during the pattern recognition
7445 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7447 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7449 stmt_vec_info patt_info
7450 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7453 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7454 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7455 gimple_set_bb (patt_stmt
, NULL
);
7456 tree lhs
= gimple_get_lhs (patt_stmt
);
7457 if (TREE_CODE (lhs
) == SSA_NAME
)
7458 release_ssa_name (lhs
);
7461 gimple_stmt_iterator si
;
7462 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7464 gimple seq_stmt
= gsi_stmt (si
);
7465 gimple_set_bb (seq_stmt
, NULL
);
7466 lhs
= gimple_get_lhs (patt_stmt
);
7467 if (TREE_CODE (lhs
) == SSA_NAME
)
7468 release_ssa_name (lhs
);
7469 free_stmt_vec_info (seq_stmt
);
7472 free_stmt_vec_info (patt_stmt
);
7476 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7477 set_vinfo_for_stmt (stmt
, NULL
);
7482 /* Function get_vectype_for_scalar_type_and_size.
7484 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7488 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7490 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7491 enum machine_mode simd_mode
;
7492 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7499 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7500 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7503 /* For vector types of elements whose mode precision doesn't
7504 match their types precision we use a element type of mode
7505 precision. The vectorization routines will have to make sure
7506 they support the proper result truncation/extension.
7507 We also make sure to build vector types with INTEGER_TYPE
7508 component type only. */
7509 if (INTEGRAL_TYPE_P (scalar_type
)
7510 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7511 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7512 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7513 TYPE_UNSIGNED (scalar_type
));
7515 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7516 When the component mode passes the above test simply use a type
7517 corresponding to that mode. The theory is that any use that
7518 would cause problems with this will disable vectorization anyway. */
7519 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7520 && !INTEGRAL_TYPE_P (scalar_type
))
7521 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7523 /* We can't build a vector type of elements with alignment bigger than
7525 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7526 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7527 TYPE_UNSIGNED (scalar_type
));
7529 /* If we felt back to using the mode fail if there was
7530 no scalar type for it. */
7531 if (scalar_type
== NULL_TREE
)
7534 /* If no size was supplied use the mode the target prefers. Otherwise
7535 lookup a vector mode of the specified size. */
7537 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7539 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7540 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7544 vectype
= build_vector_type (scalar_type
, nunits
);
7546 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7547 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7553 unsigned int current_vector_size
;
7555 /* Function get_vectype_for_scalar_type.
7557 Returns the vector type corresponding to SCALAR_TYPE as supported
7561 get_vectype_for_scalar_type (tree scalar_type
)
7564 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7565 current_vector_size
);
7567 && current_vector_size
== 0)
7568 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7572 /* Function get_same_sized_vectype
7574 Returns a vector type corresponding to SCALAR_TYPE of size
7575 VECTOR_TYPE if supported by the target. */
7578 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7580 return get_vectype_for_scalar_type_and_size
7581 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7584 /* Function vect_is_simple_use.
7587 LOOP_VINFO - the vect info of the loop that is being vectorized.
7588 BB_VINFO - the vect info of the basic block that is being vectorized.
7589 OPERAND - operand of STMT in the loop or bb.
7590 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7592 Returns whether a stmt with OPERAND can be vectorized.
7593 For loops, supportable operands are constants, loop invariants, and operands
7594 that are defined by the current iteration of the loop. Unsupportable
7595 operands are those that are defined by a previous iteration of the loop (as
7596 is the case in reduction/induction computations).
7597 For basic blocks, supportable operands are constants and bb invariants.
7598 For now, operands defined outside the basic block are not supported. */
7601 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7602 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7603 tree
*def
, enum vect_def_type
*dt
)
7606 stmt_vec_info stmt_vinfo
;
7607 struct loop
*loop
= NULL
;
7610 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7615 if (dump_enabled_p ())
7617 dump_printf_loc (MSG_NOTE
, vect_location
,
7618 "vect_is_simple_use: operand ");
7619 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7620 dump_printf (MSG_NOTE
, "\n");
7623 if (CONSTANT_CLASS_P (operand
))
7625 *dt
= vect_constant_def
;
7629 if (is_gimple_min_invariant (operand
))
7632 *dt
= vect_external_def
;
7636 if (TREE_CODE (operand
) == PAREN_EXPR
)
7638 if (dump_enabled_p ())
7639 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7640 operand
= TREE_OPERAND (operand
, 0);
7643 if (TREE_CODE (operand
) != SSA_NAME
)
7645 if (dump_enabled_p ())
7646 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7651 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7652 if (*def_stmt
== NULL
)
7654 if (dump_enabled_p ())
7655 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7660 if (dump_enabled_p ())
7662 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7663 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7664 dump_printf (MSG_NOTE
, "\n");
7667 /* Empty stmt is expected only in case of a function argument.
7668 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7669 if (gimple_nop_p (*def_stmt
))
7672 *dt
= vect_external_def
;
7676 bb
= gimple_bb (*def_stmt
);
7678 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7679 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7680 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7681 *dt
= vect_external_def
;
7684 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7685 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7688 if (*dt
== vect_unknown_def_type
7690 && *dt
== vect_double_reduction_def
7691 && gimple_code (stmt
) != GIMPLE_PHI
))
7693 if (dump_enabled_p ())
7694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7695 "Unsupported pattern.\n");
7699 if (dump_enabled_p ())
7700 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
7702 switch (gimple_code (*def_stmt
))
7705 *def
= gimple_phi_result (*def_stmt
);
7709 *def
= gimple_assign_lhs (*def_stmt
);
7713 *def
= gimple_call_lhs (*def_stmt
);
7718 if (dump_enabled_p ())
7719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7720 "unsupported defining stmt:\n");
7727 /* Function vect_is_simple_use_1.
7729 Same as vect_is_simple_use_1 but also determines the vector operand
7730 type of OPERAND and stores it to *VECTYPE. If the definition of
7731 OPERAND is vect_uninitialized_def, vect_constant_def or
7732 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7733 is responsible to compute the best suited vector type for the
7737 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7738 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7739 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
7741 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
7745 /* Now get a vector type if the def is internal, otherwise supply
7746 NULL_TREE and leave it up to the caller to figure out a proper
7747 type for the use stmt. */
7748 if (*dt
== vect_internal_def
7749 || *dt
== vect_induction_def
7750 || *dt
== vect_reduction_def
7751 || *dt
== vect_double_reduction_def
7752 || *dt
== vect_nested_cycle
)
7754 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
7756 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7757 && !STMT_VINFO_RELEVANT (stmt_info
)
7758 && !STMT_VINFO_LIVE_P (stmt_info
))
7759 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7761 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7762 gcc_assert (*vectype
!= NULL_TREE
);
7764 else if (*dt
== vect_uninitialized_def
7765 || *dt
== vect_constant_def
7766 || *dt
== vect_external_def
)
7767 *vectype
= NULL_TREE
;
7775 /* Function supportable_widening_operation
7777 Check whether an operation represented by the code CODE is a
7778 widening operation that is supported by the target platform in
7779 vector form (i.e., when operating on arguments of type VECTYPE_IN
7780 producing a result of type VECTYPE_OUT).
7782 Widening operations we currently support are NOP (CONVERT), FLOAT
7783 and WIDEN_MULT. This function checks if these operations are supported
7784 by the target platform either directly (via vector tree-codes), or via
7788 - CODE1 and CODE2 are codes of vector operations to be used when
7789 vectorizing the operation, if available.
7790 - MULTI_STEP_CVT determines the number of required intermediate steps in
7791 case of multi-step conversion (like char->short->int - in that case
7792 MULTI_STEP_CVT will be 1).
7793 - INTERM_TYPES contains the intermediate type required to perform the
7794 widening operation (short in the above example). */
7797 supportable_widening_operation (enum tree_code code
, gimple stmt
,
7798 tree vectype_out
, tree vectype_in
,
7799 enum tree_code
*code1
, enum tree_code
*code2
,
7800 int *multi_step_cvt
,
7801 vec
<tree
> *interm_types
)
7803 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7804 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7805 struct loop
*vect_loop
= NULL
;
7806 enum machine_mode vec_mode
;
7807 enum insn_code icode1
, icode2
;
7808 optab optab1
, optab2
;
7809 tree vectype
= vectype_in
;
7810 tree wide_vectype
= vectype_out
;
7811 enum tree_code c1
, c2
;
7813 tree prev_type
, intermediate_type
;
7814 enum machine_mode intermediate_mode
, prev_mode
;
7815 optab optab3
, optab4
;
7817 *multi_step_cvt
= 0;
7819 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
7823 case WIDEN_MULT_EXPR
:
7824 /* The result of a vectorized widening operation usually requires
7825 two vectors (because the widened results do not fit into one vector).
7826 The generated vector results would normally be expected to be
7827 generated in the same order as in the original scalar computation,
7828 i.e. if 8 results are generated in each vector iteration, they are
7829 to be organized as follows:
7830 vect1: [res1,res2,res3,res4],
7831 vect2: [res5,res6,res7,res8].
7833 However, in the special case that the result of the widening
7834 operation is used in a reduction computation only, the order doesn't
7835 matter (because when vectorizing a reduction we change the order of
7836 the computation). Some targets can take advantage of this and
7837 generate more efficient code. For example, targets like Altivec,
7838 that support widen_mult using a sequence of {mult_even,mult_odd}
7839 generate the following vectors:
7840 vect1: [res1,res3,res5,res7],
7841 vect2: [res2,res4,res6,res8].
7843 When vectorizing outer-loops, we execute the inner-loop sequentially
7844 (each vectorized inner-loop iteration contributes to VF outer-loop
7845 iterations in parallel). We therefore don't allow to change the
7846 order of the computation in the inner-loop during outer-loop
7848 /* TODO: Another case in which order doesn't *really* matter is when we
7849 widen and then contract again, e.g. (short)((int)x * y >> 8).
7850 Normally, pack_trunc performs an even/odd permute, whereas the
7851 repack from an even/odd expansion would be an interleave, which
7852 would be significantly simpler for e.g. AVX2. */
7853 /* In any case, in order to avoid duplicating the code below, recurse
7854 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7855 are properly set up for the caller. If we fail, we'll continue with
7856 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7858 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
7859 && !nested_in_vect_loop_p (vect_loop
, stmt
)
7860 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
7861 stmt
, vectype_out
, vectype_in
,
7862 code1
, code2
, multi_step_cvt
,
7865 /* Elements in a vector with vect_used_by_reduction property cannot
7866 be reordered if the use chain with this property does not have the
7867 same operation. One such an example is s += a * b, where elements
7868 in a and b cannot be reordered. Here we check if the vector defined
7869 by STMT is only directly used in the reduction statement. */
7870 tree lhs
= gimple_assign_lhs (stmt
);
7871 use_operand_p dummy
;
7873 stmt_vec_info use_stmt_info
= NULL
;
7874 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
7875 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
7876 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
7879 c1
= VEC_WIDEN_MULT_LO_EXPR
;
7880 c2
= VEC_WIDEN_MULT_HI_EXPR
;
7883 case VEC_WIDEN_MULT_EVEN_EXPR
:
7884 /* Support the recursion induced just above. */
7885 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
7886 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
7889 case WIDEN_LSHIFT_EXPR
:
7890 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
7891 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
7895 c1
= VEC_UNPACK_LO_EXPR
;
7896 c2
= VEC_UNPACK_HI_EXPR
;
7900 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
7901 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
7904 case FIX_TRUNC_EXPR
:
7905 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7906 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7907 computing the operation. */
7914 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
7916 enum tree_code ctmp
= c1
;
7921 if (code
== FIX_TRUNC_EXPR
)
7923 /* The signedness is determined from output operand. */
7924 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
7925 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
7929 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
7930 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
7933 if (!optab1
|| !optab2
)
7936 vec_mode
= TYPE_MODE (vectype
);
7937 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
7938 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
7944 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7945 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7948 /* Check if it's a multi-step conversion that can be done using intermediate
7951 prev_type
= vectype
;
7952 prev_mode
= vec_mode
;
7954 if (!CONVERT_EXPR_CODE_P (code
))
7957 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7958 intermediate steps in promotion sequence. We try
7959 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7961 interm_types
->create (MAX_INTERM_CVT_STEPS
);
7962 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
7964 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
7966 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
7967 TYPE_UNSIGNED (prev_type
));
7968 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
7969 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
7971 if (!optab3
|| !optab4
7972 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
7973 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
7974 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
7975 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
7976 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
7977 == CODE_FOR_nothing
)
7978 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
7979 == CODE_FOR_nothing
))
7982 interm_types
->quick_push (intermediate_type
);
7983 (*multi_step_cvt
)++;
7985 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7986 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7989 prev_type
= intermediate_type
;
7990 prev_mode
= intermediate_mode
;
7993 interm_types
->release ();
7998 /* Function supportable_narrowing_operation
8000 Check whether an operation represented by the code CODE is a
8001 narrowing operation that is supported by the target platform in
8002 vector form (i.e., when operating on arguments of type VECTYPE_IN
8003 and producing a result of type VECTYPE_OUT).
8005 Narrowing operations we currently support are NOP (CONVERT) and
8006 FIX_TRUNC. This function checks if these operations are supported by
8007 the target platform directly via vector tree-codes.
8010 - CODE1 is the code of a vector operation to be used when
8011 vectorizing the operation, if available.
8012 - MULTI_STEP_CVT determines the number of required intermediate steps in
8013 case of multi-step conversion (like int->short->char - in that case
8014 MULTI_STEP_CVT will be 1).
8015 - INTERM_TYPES contains the intermediate type required to perform the
8016 narrowing operation (short in the above example). */
8019 supportable_narrowing_operation (enum tree_code code
,
8020 tree vectype_out
, tree vectype_in
,
8021 enum tree_code
*code1
, int *multi_step_cvt
,
8022 vec
<tree
> *interm_types
)
8024 enum machine_mode vec_mode
;
8025 enum insn_code icode1
;
8026 optab optab1
, interm_optab
;
8027 tree vectype
= vectype_in
;
8028 tree narrow_vectype
= vectype_out
;
8030 tree intermediate_type
;
8031 enum machine_mode intermediate_mode
, prev_mode
;
8035 *multi_step_cvt
= 0;
8039 c1
= VEC_PACK_TRUNC_EXPR
;
8042 case FIX_TRUNC_EXPR
:
8043 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8047 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8048 tree code and optabs used for computing the operation. */
8055 if (code
== FIX_TRUNC_EXPR
)
8056 /* The signedness is determined from output operand. */
8057 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8059 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8064 vec_mode
= TYPE_MODE (vectype
);
8065 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8070 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8073 /* Check if it's a multi-step conversion that can be done using intermediate
8075 prev_mode
= vec_mode
;
8076 if (code
== FIX_TRUNC_EXPR
)
8077 uns
= TYPE_UNSIGNED (vectype_out
);
8079 uns
= TYPE_UNSIGNED (vectype
);
8081 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8082 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8083 costly than signed. */
8084 if (code
== FIX_TRUNC_EXPR
&& uns
)
8086 enum insn_code icode2
;
8089 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8091 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8092 if (interm_optab
!= unknown_optab
8093 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8094 && insn_data
[icode1
].operand
[0].mode
8095 == insn_data
[icode2
].operand
[0].mode
)
8098 optab1
= interm_optab
;
8103 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8104 intermediate steps in promotion sequence. We try
8105 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8106 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8107 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8109 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8111 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8113 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8116 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8117 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8118 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8119 == CODE_FOR_nothing
))
8122 interm_types
->quick_push (intermediate_type
);
8123 (*multi_step_cvt
)++;
8125 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8128 prev_mode
= intermediate_mode
;
8129 optab1
= interm_optab
;
8132 interm_types
->release ();