1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 int misalign
, enum vect_cost_model_location where
)
97 if ((kind
== vector_load
|| kind
== unaligned_load
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_gather_load
;
100 if ((kind
== vector_store
|| kind
== unaligned_store
)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
102 kind
= vector_scatter_store
;
104 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, misalign
};
105 body_cost_vec
->safe_push (si
);
107 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
128 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
130 tree vect_type
, vect
, vect_name
, array_ref
;
133 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
134 vect_type
= TREE_TYPE (TREE_TYPE (array
));
135 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
136 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
137 build_int_cst (size_type_node
, n
),
138 NULL_TREE
, NULL_TREE
);
140 new_stmt
= gimple_build_assign (vect
, array_ref
);
141 vect_name
= make_ssa_name (vect
, new_stmt
);
142 gimple_assign_set_lhs (new_stmt
, vect_name
);
143 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
153 write_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
154 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
159 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
160 build_int_cst (size_type_node
, n
),
161 NULL_TREE
, NULL_TREE
);
163 new_stmt
= gimple_build_assign (array_ref
, vect
);
164 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
172 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
176 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
186 vect_clobber_variable (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
189 tree clobber
= build_clobber (TREE_TYPE (var
));
190 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
191 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
202 enum vect_relevant relevant
, bool live_p
)
204 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
205 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE
, vect_location
,
209 "mark relevant %d, live %d: %G", relevant
, live_p
,
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE
, vect_location
,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info
= stmt_info
;
228 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
230 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
231 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
234 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
235 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
236 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
238 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE
, vect_location
,
243 "already marked relevant/live.\n");
247 worklist
->safe_push (stmt_info
);
251 /* Function is_simple_and_all_uses_invariant
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
257 loop_vec_info loop_vinfo
)
262 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
266 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
268 enum vect_def_type dt
= vect_uninitialized_def
;
270 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
274 "use not simple.\n");
278 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
297 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
298 enum vect_relevant
*relevant
, bool *live_p
)
300 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
302 imm_use_iterator imm_iter
;
306 *relevant
= vect_unused_in_scope
;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info
->stmt
)
311 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
312 *relevant
= vect_used_in_scope
;
314 /* changing memory. */
315 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
316 if (gimple_vdef (stmt_info
->stmt
)
317 && !gimple_clobber_p (stmt_info
->stmt
))
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE
, vect_location
,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant
= vect_used_in_scope
;
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
328 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
330 basic_block bb
= gimple_bb (USE_STMT (use_p
));
331 if (!flow_bb_inside_loop_p (loop
, bb
))
333 if (is_gimple_debug (USE_STMT (use_p
)))
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE
, vect_location
,
338 "vec_stmt_relevant_p: used out of loop.\n");
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
343 gcc_assert (bb
== single_exit (loop
)->dest
);
350 if (*live_p
&& *relevant
== vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE
, vect_location
,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant
= vect_used_only_live
;
359 return (*live_p
|| *relevant
);
363 /* Function exist_non_indexing_operands_for_use_p
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
369 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info
))
379 /* STMT has a data_ref. FORNOW this means that its of one of
383 (This should have been verified in analyze_data_refs).
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
392 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
393 if (!assign
|| !gimple_assign_copy_p (assign
))
395 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
396 if (call
&& gimple_call_internal_p (call
))
398 internal_fn ifn
= gimple_call_internal_fn (call
);
399 int mask_index
= internal_fn_mask_index (ifn
);
401 && use
== gimple_call_arg (call
, mask_index
))
403 int stored_value_index
= internal_fn_stored_value_index (ifn
);
404 if (stored_value_index
>= 0
405 && use
== gimple_call_arg (call
, stored_value_index
))
407 if (internal_gather_scatter_fn_p (ifn
)
408 && use
== gimple_call_arg (call
, 1))
414 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
416 operand
= gimple_assign_rhs1 (assign
);
417 if (TREE_CODE (operand
) != SSA_NAME
)
428 Function process_use.
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
452 Return true if everything is as expected. Return false otherwise. */
455 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
456 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
459 stmt_vec_info dstmt_vinfo
;
460 enum vect_def_type dt
;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
465 return opt_result::success ();
467 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
468 return opt_result::failure_at (stmt_vinfo
->stmt
,
470 " unsupported use in stmt.\n");
473 return opt_result::success ();
475 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
476 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 We have to force the stmt live since the epilogue loop needs it to
480 continue computing the reduction. */
481 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
483 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
485 && bb
->loop_father
== def_bb
->loop_father
)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE
, vect_location
,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
491 return opt_result::success ();
494 /* case 3a: outer-loop stmt defining an inner-loop stmt:
495 outer-loop-header-bb:
501 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_NOTE
, vect_location
,
505 "outer-loop def-stmt defining inner-loop stmt.\n");
509 case vect_unused_in_scope
:
510 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
511 vect_used_in_scope
: vect_unused_in_scope
;
514 case vect_used_in_outer_by_reduction
:
515 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
516 relevant
= vect_used_by_reduction
;
519 case vect_used_in_outer
:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
521 relevant
= vect_used_in_scope
;
524 case vect_used_in_scope
:
532 /* case 3b: inner-loop stmt defining an outer-loop stmt:
533 outer-loop-header-bb:
537 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
539 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE
, vect_location
,
543 "inner-loop def-stmt defining outer-loop stmt.\n");
547 case vect_unused_in_scope
:
548 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
549 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
550 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
553 case vect_used_by_reduction
:
554 case vect_used_only_live
:
555 relevant
= vect_used_in_outer_by_reduction
;
558 case vect_used_in_scope
:
559 relevant
= vect_used_in_outer
;
566 /* We are also not interested in uses on loop PHI backedges that are
567 inductions. Otherwise we'll needlessly vectorize the IV increment
568 and cause hybrid SLP for SLP inductions. Unless the PHI is live
570 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
571 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
572 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
573 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
574 loop_latch_edge (bb
->loop_father
))
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE
, vect_location
,
579 "induction value on backedge.\n");
580 return opt_result::success ();
584 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
585 return opt_result::success ();
589 /* Function vect_mark_stmts_to_be_vectorized.
591 Not all stmts in the loop need to be vectorized. For example:
600 Stmt 1 and 3 do not need to be vectorized, because loop control and
601 addressing of vectorized data-refs are handled differently.
603 This pass detects such stmts. */
606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
608 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
609 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
610 unsigned int nbbs
= loop
->num_nodes
;
611 gimple_stmt_iterator si
;
615 enum vect_relevant relevant
;
617 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
619 auto_vec
<stmt_vec_info
, 64> worklist
;
621 /* 1. Init worklist. */
622 for (i
= 0; i
< nbbs
; i
++)
625 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
627 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
632 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
633 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
635 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
637 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE
, vect_location
,
640 "init: stmt relevant? %G", stmt_info
->stmt
);
642 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
643 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
647 /* 2. Process_worklist */
648 while (worklist
.length () > 0)
653 stmt_vec_info stmt_vinfo
= worklist
.pop ();
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE
, vect_location
,
656 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
658 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
659 (DEF_STMT) as relevant/irrelevant according to the relevance property
661 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
663 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
664 propagated as is to the DEF_STMTs of its USEs.
666 One exception is when STMT has been identified as defining a reduction
667 variable; in this case we set the relevance to vect_used_by_reduction.
668 This is because we distinguish between two kinds of relevant stmts -
669 those that are used by a reduction computation, and those that are
670 (also) used by a regular computation. This allows us later on to
671 identify stmts that are used solely by a reduction, and therefore the
672 order of the results that they produce does not have to be kept. */
674 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
676 case vect_reduction_def
:
677 gcc_assert (relevant
!= vect_unused_in_scope
);
678 if (relevant
!= vect_unused_in_scope
679 && relevant
!= vect_used_in_scope
680 && relevant
!= vect_used_by_reduction
681 && relevant
!= vect_used_only_live
)
682 return opt_result::failure_at
683 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
686 case vect_nested_cycle
:
687 if (relevant
!= vect_unused_in_scope
688 && relevant
!= vect_used_in_outer_by_reduction
689 && relevant
!= vect_used_in_outer
)
690 return opt_result::failure_at
691 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
694 case vect_double_reduction_def
:
695 if (relevant
!= vect_unused_in_scope
696 && relevant
!= vect_used_by_reduction
697 && relevant
!= vect_used_only_live
)
698 return opt_result::failure_at
699 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
706 if (is_pattern_stmt_p (stmt_vinfo
))
708 /* Pattern statements are not inserted into the code, so
709 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
710 have to scan the RHS or function arguments instead. */
711 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
713 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
714 tree op
= gimple_assign_rhs1 (assign
);
717 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
720 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
721 loop_vinfo
, relevant
, &worklist
, false);
724 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
725 loop_vinfo
, relevant
, &worklist
, false);
730 for (; i
< gimple_num_ops (assign
); i
++)
732 op
= gimple_op (assign
, i
);
733 if (TREE_CODE (op
) == SSA_NAME
)
736 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
743 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
745 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
747 tree arg
= gimple_call_arg (call
, i
);
749 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
757 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
759 tree op
= USE_FROM_PTR (use_p
);
761 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
767 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
769 gather_scatter_info gs_info
;
770 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
773 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
782 } /* while worklist */
784 return opt_result::success ();
787 /* Compute the prologue cost for invariant or constant operands. */
790 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
791 unsigned opno
, enum vect_def_type dt
,
792 stmt_vector_for_cost
*cost_vec
)
794 vec_info
*vinfo
= stmt_info
->vinfo
;
795 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
796 tree op
= gimple_op (stmt
, opno
);
797 unsigned prologue_cost
= 0;
799 /* Without looking at the actual initializer a vector of
800 constants can be implemented as load from the constant pool.
801 When all elements are the same we can use a splat. */
802 tree vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
));
803 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
804 unsigned num_vects_to_check
;
805 unsigned HOST_WIDE_INT const_nunits
;
807 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
808 && ! multiple_p (const_nunits
, group_size
))
810 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
811 nelt_limit
= const_nunits
;
815 /* If either the vector has variable length or the vectors
816 are composed of repeated whole groups we only need to
817 cost construction once. All vectors will be the same. */
818 num_vects_to_check
= 1;
819 nelt_limit
= group_size
;
821 tree elt
= NULL_TREE
;
823 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
825 unsigned si
= j
% group_size
;
827 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
, opno
);
828 /* ??? We're just tracking whether all operands of a single
829 vector initializer are the same, ideally we'd check if
830 we emitted the same one already. */
831 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
,
835 if (nelt
== nelt_limit
)
837 /* ??? We need to pass down stmt_info for a vector type
838 even if it points to the wrong stmt. */
839 prologue_cost
+= record_stmt_cost
841 dt
== vect_external_def
842 ? (elt
? scalar_to_vec
: vec_construct
)
844 stmt_info
, 0, vect_prologue
);
849 return prologue_cost
;
852 /* Function vect_model_simple_cost.
854 Models cost for simple operations, i.e. those that only emit ncopies of a
855 single op. Right now, this does not account for multiple insns that could
856 be generated for the single vector op. We will handle that shortly. */
859 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
860 enum vect_def_type
*dt
,
863 stmt_vector_for_cost
*cost_vec
)
865 int inside_cost
= 0, prologue_cost
= 0;
867 gcc_assert (cost_vec
!= NULL
);
869 /* ??? Somehow we need to fix this at the callers. */
871 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
875 /* Scan operands and account for prologue cost of constants/externals.
876 ??? This over-estimates cost for multiple uses and should be
878 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
879 tree lhs
= gimple_get_lhs (stmt
);
880 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
882 tree op
= gimple_op (stmt
, i
);
883 enum vect_def_type dt
;
884 if (!op
|| op
== lhs
)
886 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
887 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
888 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
893 /* Cost the "broadcast" of a scalar operand in to a vector operand.
894 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
896 for (int i
= 0; i
< ndts
; i
++)
897 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
898 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
899 stmt_info
, 0, vect_prologue
);
901 /* Adjust for two-operator SLP nodes. */
902 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
905 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
906 stmt_info
, 0, vect_body
);
909 /* Pass the inside-of-loop statements to the target-specific cost model. */
910 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vector_stmt
,
911 stmt_info
, 0, vect_body
);
913 if (dump_enabled_p ())
914 dump_printf_loc (MSG_NOTE
, vect_location
,
915 "vect_model_simple_cost: inside_cost = %d, "
916 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
920 /* Model cost for type demotion and promotion operations. PWR is
921 normally zero for single-step promotions and demotions. It will be
922 one if two-step promotion/demotion is required, and so on. NCOPIES
923 is the number of vector results (and thus number of instructions)
924 for the narrowest end of the operation chain. Each additional
925 step doubles the number of instructions required. */
928 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
929 enum vect_def_type
*dt
,
930 unsigned int ncopies
, int pwr
,
931 stmt_vector_for_cost
*cost_vec
)
934 int inside_cost
= 0, prologue_cost
= 0;
936 for (i
= 0; i
< pwr
+ 1; i
++)
938 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_promote_demote
,
939 stmt_info
, 0, vect_body
);
943 /* FORNOW: Assuming maximum 2 args per stmts. */
944 for (i
= 0; i
< 2; i
++)
945 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
946 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
947 stmt_info
, 0, vect_prologue
);
949 if (dump_enabled_p ())
950 dump_printf_loc (MSG_NOTE
, vect_location
,
951 "vect_model_promotion_demotion_cost: inside_cost = %d, "
952 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
955 /* Returns true if the current function returns DECL. */
958 cfun_returns (tree decl
)
962 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
964 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
967 if (gimple_return_retval (ret
) == decl
)
969 /* We often end up with an aggregate copy to the result decl,
970 handle that case as well. First skip intermediate clobbers
975 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
977 while (gimple_clobber_p (def
));
978 if (is_a
<gassign
*> (def
)
979 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
980 && gimple_assign_rhs1 (def
) == decl
)
986 /* Function vect_model_store_cost
988 Models cost for stores. In the case of grouped accesses, one access
989 has the overhead of the grouped access attributed to it. */
992 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
993 enum vect_def_type dt
,
994 vect_memory_access_type memory_access_type
,
995 vec_load_store_type vls_type
, slp_tree slp_node
,
996 stmt_vector_for_cost
*cost_vec
)
998 unsigned int inside_cost
= 0, prologue_cost
= 0;
999 stmt_vec_info first_stmt_info
= stmt_info
;
1000 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1002 /* ??? Somehow we need to fix this at the callers. */
1004 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1006 if (vls_type
== VLS_STORE_INVARIANT
)
1009 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
1012 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
1013 stmt_info
, 0, vect_prologue
);
1016 /* Grouped stores update all elements in the group at once,
1017 so we want the DR for the first statement. */
1018 if (!slp_node
&& grouped_access_p
)
1019 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1021 /* True if we should include any once-per-group costs as well as
1022 the cost of the statement itself. For SLP we only get called
1023 once per group anyhow. */
1024 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1026 /* We assume that the cost of a single store-lanes instruction is
1027 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1028 access is instead being provided by a permute-and-store operation,
1029 include the cost of the permutes. */
1031 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1033 /* Uses a high and low interleave or shuffle operations for each
1035 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1036 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1037 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1038 stmt_info
, 0, vect_body
);
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_NOTE
, vect_location
,
1042 "vect_model_store_cost: strided group_size = %d .\n",
1046 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1047 /* Costs of the stores. */
1048 if (memory_access_type
== VMAT_ELEMENTWISE
1049 || memory_access_type
== VMAT_GATHER_SCATTER
)
1051 /* N scalar stores plus extracting the elements. */
1052 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1053 inside_cost
+= record_stmt_cost (cost_vec
,
1054 ncopies
* assumed_nunits
,
1055 scalar_store
, stmt_info
, 0, vect_body
);
1058 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1060 if (memory_access_type
== VMAT_ELEMENTWISE
1061 || memory_access_type
== VMAT_STRIDED_SLP
)
1063 /* N scalar stores plus extracting the elements. */
1064 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1065 inside_cost
+= record_stmt_cost (cost_vec
,
1066 ncopies
* assumed_nunits
,
1067 vec_to_scalar
, stmt_info
, 0, vect_body
);
1070 /* When vectorizing a store into the function result assign
1071 a penalty if the function returns in a multi-register location.
1072 In this case we assume we'll end up with having to spill the
1073 vector result and do piecewise loads as a conservative estimate. */
1074 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1076 && (TREE_CODE (base
) == RESULT_DECL
1077 || (DECL_P (base
) && cfun_returns (base
)))
1078 && !aggregate_value_p (base
, cfun
->decl
))
1080 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1081 /* ??? Handle PARALLEL in some way. */
1084 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1085 /* Assume that a single reg-reg move is possible and cheap,
1086 do not account for vector to gp register move cost. */
1090 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1092 stmt_info
, 0, vect_epilogue
);
1094 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1096 stmt_info
, 0, vect_epilogue
);
1101 if (dump_enabled_p ())
1102 dump_printf_loc (MSG_NOTE
, vect_location
,
1103 "vect_model_store_cost: inside_cost = %d, "
1104 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1108 /* Calculate cost of DR's memory access. */
1110 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1111 unsigned int *inside_cost
,
1112 stmt_vector_for_cost
*body_cost_vec
)
1114 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1115 int alignment_support_scheme
1116 = vect_supportable_dr_alignment (dr_info
, false);
1118 switch (alignment_support_scheme
)
1122 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1123 vector_store
, stmt_info
, 0,
1126 if (dump_enabled_p ())
1127 dump_printf_loc (MSG_NOTE
, vect_location
,
1128 "vect_model_store_cost: aligned.\n");
1132 case dr_unaligned_supported
:
1134 /* Here, we assign an additional cost for the unaligned store. */
1135 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1136 unaligned_store
, stmt_info
,
1137 DR_MISALIGNMENT (dr_info
),
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE
, vect_location
,
1141 "vect_model_store_cost: unaligned supported by "
1146 case dr_unaligned_unsupported
:
1148 *inside_cost
= VECT_MAX_COST
;
1150 if (dump_enabled_p ())
1151 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1152 "vect_model_store_cost: unsupported access.\n");
1162 /* Function vect_model_load_cost
1164 Models cost for loads. In the case of grouped accesses, one access has
1165 the overhead of the grouped access attributed to it. Since unaligned
1166 accesses are supported for loads, we also account for the costs of the
1167 access scheme chosen. */
1170 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1171 vect_memory_access_type memory_access_type
,
1172 slp_instance instance
,
1174 stmt_vector_for_cost
*cost_vec
)
1176 unsigned int inside_cost
= 0, prologue_cost
= 0;
1177 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1179 gcc_assert (cost_vec
);
1181 /* ??? Somehow we need to fix this at the callers. */
1183 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1185 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1187 /* If the load is permuted then the alignment is determined by
1188 the first group element not by the first scalar stmt DR. */
1189 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1190 /* Record the cost for the permutation. */
1192 unsigned assumed_nunits
1193 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1194 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1195 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1196 slp_vf
, instance
, true,
1198 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1199 first_stmt_info
, 0, vect_body
);
1200 /* And adjust the number of loads performed. This handles
1201 redundancies as well as loads that are later dead. */
1202 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1203 bitmap_clear (perm
);
1204 for (unsigned i
= 0;
1205 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1206 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1208 bool load_seen
= false;
1209 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1211 if (i
% assumed_nunits
== 0)
1217 if (bitmap_bit_p (perm
, i
))
1223 <= (DR_GROUP_SIZE (first_stmt_info
)
1224 - DR_GROUP_GAP (first_stmt_info
)
1225 + assumed_nunits
- 1) / assumed_nunits
);
1228 /* Grouped loads read all elements in the group at once,
1229 so we want the DR for the first statement. */
1230 stmt_vec_info first_stmt_info
= stmt_info
;
1231 if (!slp_node
&& grouped_access_p
)
1232 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1234 /* True if we should include any once-per-group costs as well as
1235 the cost of the statement itself. For SLP we only get called
1236 once per group anyhow. */
1237 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1239 /* We assume that the cost of a single load-lanes instruction is
1240 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1241 access is instead being provided by a load-and-permute operation,
1242 include the cost of the permutes. */
1244 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1246 /* Uses an even and odd extract operations or shuffle operations
1247 for each needed permute. */
1248 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1249 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1250 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1251 stmt_info
, 0, vect_body
);
1253 if (dump_enabled_p ())
1254 dump_printf_loc (MSG_NOTE
, vect_location
,
1255 "vect_model_load_cost: strided group_size = %d .\n",
1259 /* The loads themselves. */
1260 if (memory_access_type
== VMAT_ELEMENTWISE
1261 || memory_access_type
== VMAT_GATHER_SCATTER
)
1263 /* N scalar loads plus gathering them into a vector. */
1264 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1265 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1266 inside_cost
+= record_stmt_cost (cost_vec
,
1267 ncopies
* assumed_nunits
,
1268 scalar_load
, stmt_info
, 0, vect_body
);
1271 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1272 &inside_cost
, &prologue_cost
,
1273 cost_vec
, cost_vec
, true);
1274 if (memory_access_type
== VMAT_ELEMENTWISE
1275 || memory_access_type
== VMAT_STRIDED_SLP
)
1276 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1277 stmt_info
, 0, vect_body
);
1279 if (dump_enabled_p ())
1280 dump_printf_loc (MSG_NOTE
, vect_location
,
1281 "vect_model_load_cost: inside_cost = %d, "
1282 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1286 /* Calculate cost of DR's memory access. */
1288 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1289 bool add_realign_cost
, unsigned int *inside_cost
,
1290 unsigned int *prologue_cost
,
1291 stmt_vector_for_cost
*prologue_cost_vec
,
1292 stmt_vector_for_cost
*body_cost_vec
,
1293 bool record_prologue_costs
)
1295 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1296 int alignment_support_scheme
1297 = vect_supportable_dr_alignment (dr_info
, false);
1299 switch (alignment_support_scheme
)
1303 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1304 stmt_info
, 0, vect_body
);
1306 if (dump_enabled_p ())
1307 dump_printf_loc (MSG_NOTE
, vect_location
,
1308 "vect_model_load_cost: aligned.\n");
1312 case dr_unaligned_supported
:
1314 /* Here, we assign an additional cost for the unaligned load. */
1315 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1316 unaligned_load
, stmt_info
,
1317 DR_MISALIGNMENT (dr_info
),
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE
, vect_location
,
1322 "vect_model_load_cost: unaligned supported by "
1327 case dr_explicit_realign
:
1329 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1330 vector_load
, stmt_info
, 0, vect_body
);
1331 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1332 vec_perm
, stmt_info
, 0, vect_body
);
1334 /* FIXME: If the misalignment remains fixed across the iterations of
1335 the containing loop, the following cost should be added to the
1337 if (targetm
.vectorize
.builtin_mask_for_load
)
1338 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1339 stmt_info
, 0, vect_body
);
1341 if (dump_enabled_p ())
1342 dump_printf_loc (MSG_NOTE
, vect_location
,
1343 "vect_model_load_cost: explicit realign\n");
1347 case dr_explicit_realign_optimized
:
1349 if (dump_enabled_p ())
1350 dump_printf_loc (MSG_NOTE
, vect_location
,
1351 "vect_model_load_cost: unaligned software "
1354 /* Unaligned software pipeline has a load of an address, an initial
1355 load, and possibly a mask operation to "prime" the loop. However,
1356 if this is an access in a group of loads, which provide grouped
1357 access, then the above cost should only be considered for one
1358 access in the group. Inside the loop, there is a load op
1359 and a realignment op. */
1361 if (add_realign_cost
&& record_prologue_costs
)
1363 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1364 vector_stmt
, stmt_info
,
1366 if (targetm
.vectorize
.builtin_mask_for_load
)
1367 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1368 vector_stmt
, stmt_info
,
1372 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1373 stmt_info
, 0, vect_body
);
1374 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1375 stmt_info
, 0, vect_body
);
1377 if (dump_enabled_p ())
1378 dump_printf_loc (MSG_NOTE
, vect_location
,
1379 "vect_model_load_cost: explicit realign optimized"
1385 case dr_unaligned_unsupported
:
1387 *inside_cost
= VECT_MAX_COST
;
1389 if (dump_enabled_p ())
1390 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1391 "vect_model_load_cost: unsupported access.\n");
1400 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1401 the loop preheader for the vectorized stmt STMT_VINFO. */
1404 vect_init_vector_1 (stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1405 gimple_stmt_iterator
*gsi
)
1408 vect_finish_stmt_generation (stmt_vinfo
, new_stmt
, gsi
);
1411 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1415 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1419 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1422 pe
= loop_preheader_edge (loop
);
1423 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1424 gcc_assert (!new_bb
);
1428 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1430 gimple_stmt_iterator gsi_bb_start
;
1432 gcc_assert (bb_vinfo
);
1433 bb
= BB_VINFO_BB (bb_vinfo
);
1434 gsi_bb_start
= gsi_after_labels (bb
);
1435 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE
, vect_location
,
1441 "created new init_stmt: %G", new_stmt
);
1444 /* Function vect_init_vector.
1446 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1447 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1448 vector type a vector with all elements equal to VAL is created first.
1449 Place the initialization at GSI if it is not NULL. Otherwise, place the
1450 initialization at the loop preheader.
1451 Return the DEF of INIT_STMT.
1452 It will be used in the vectorization of STMT_INFO. */
1455 vect_init_vector (stmt_vec_info stmt_info
, tree val
, tree type
,
1456 gimple_stmt_iterator
*gsi
)
1461 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1462 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1464 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1465 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1467 /* Scalar boolean value should be transformed into
1468 all zeros or all ones value before building a vector. */
1469 if (VECTOR_BOOLEAN_TYPE_P (type
))
1471 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1472 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1474 if (CONSTANT_CLASS_P (val
))
1475 val
= integer_zerop (val
) ? false_val
: true_val
;
1478 new_temp
= make_ssa_name (TREE_TYPE (type
));
1479 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1480 val
, true_val
, false_val
);
1481 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1487 gimple_seq stmts
= NULL
;
1488 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1489 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1490 TREE_TYPE (type
), val
);
1492 /* ??? Condition vectorization expects us to do
1493 promotion of invariant/external defs. */
1494 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1495 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1496 !gsi_end_p (gsi2
); )
1498 init_stmt
= gsi_stmt (gsi2
);
1499 gsi_remove (&gsi2
, false);
1500 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1504 val
= build_vector_from_val (type
, val
);
1507 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1508 init_stmt
= gimple_build_assign (new_temp
, val
);
1509 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1513 /* Function vect_get_vec_def_for_operand_1.
1515 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1516 with type DT that will be used in the vectorized stmt. */
1519 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1520 enum vect_def_type dt
)
1523 stmt_vec_info vec_stmt_info
;
1527 /* operand is a constant or a loop invariant. */
1528 case vect_constant_def
:
1529 case vect_external_def
:
1530 /* Code should use vect_get_vec_def_for_operand. */
1533 /* Operand is defined by a loop header phi. In case of nested
1534 cycles we also may have uses of the backedge def. */
1535 case vect_reduction_def
:
1536 case vect_double_reduction_def
:
1537 case vect_nested_cycle
:
1538 case vect_induction_def
:
1539 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1540 || dt
== vect_nested_cycle
);
1543 /* operand is defined inside the loop. */
1544 case vect_internal_def
:
1546 /* Get the def from the vectorized stmt. */
1547 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1548 /* Get vectorized pattern statement. */
1550 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1551 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1552 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1553 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1554 gcc_assert (vec_stmt_info
);
1555 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1556 vec_oprnd
= PHI_RESULT (phi
);
1558 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1568 /* Function vect_get_vec_def_for_operand.
1570 OP is an operand in STMT_VINFO. This function returns a (vector) def
1571 that will be used in the vectorized stmt for STMT_VINFO.
1573 In the case that OP is an SSA_NAME which is defined in the loop, then
1574 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1576 In case OP is an invariant or constant, a new stmt that creates a vector def
1577 needs to be introduced. VECTYPE may be used to specify a required type for
1578 vector invariant. */
1581 vect_get_vec_def_for_operand (tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1584 enum vect_def_type dt
;
1586 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1588 if (dump_enabled_p ())
1589 dump_printf_loc (MSG_NOTE
, vect_location
,
1590 "vect_get_vec_def_for_operand: %T\n", op
);
1592 stmt_vec_info def_stmt_info
;
1593 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1594 &def_stmt_info
, &def_stmt
);
1595 gcc_assert (is_simple_use
);
1596 if (def_stmt
&& dump_enabled_p ())
1597 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1599 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1601 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1605 vector_type
= vectype
;
1606 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1607 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1608 vector_type
= truth_type_for (stmt_vectype
);
1610 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1612 gcc_assert (vector_type
);
1613 return vect_init_vector (stmt_vinfo
, op
, vector_type
, NULL
);
1616 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1620 /* Function vect_get_vec_def_for_stmt_copy
1622 Return a vector-def for an operand. This function is used when the
1623 vectorized stmt to be created (by the caller to this function) is a "copy"
1624 created in case the vectorized result cannot fit in one vector, and several
1625 copies of the vector-stmt are required. In this case the vector-def is
1626 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1627 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1630 In case the vectorization factor (VF) is bigger than the number
1631 of elements that can fit in a vectype (nunits), we have to generate
1632 more than one vector stmt to vectorize the scalar stmt. This situation
1633 arises when there are multiple data-types operated upon in the loop; the
1634 smallest data-type determines the VF, and as a result, when vectorizing
1635 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1636 vector stmt (each computing a vector of 'nunits' results, and together
1637 computing 'VF' results in each iteration). This function is called when
1638 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1639 which VF=16 and nunits=4, so the number of copies required is 4):
1641 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1643 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1644 VS1.1: vx.1 = memref1 VS1.2
1645 VS1.2: vx.2 = memref2 VS1.3
1646 VS1.3: vx.3 = memref3
1648 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1649 VSnew.1: vz1 = vx.1 + ... VSnew.2
1650 VSnew.2: vz2 = vx.2 + ... VSnew.3
1651 VSnew.3: vz3 = vx.3 + ...
1653 The vectorization of S1 is explained in vectorizable_load.
1654 The vectorization of S2:
1655 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1656 the function 'vect_get_vec_def_for_operand' is called to
1657 get the relevant vector-def for each operand of S2. For operand x it
1658 returns the vector-def 'vx.0'.
1660 To create the remaining copies of the vector-stmt (VSnew.j), this
1661 function is called to get the relevant vector-def for each operand. It is
1662 obtained from the respective VS1.j stmt, which is recorded in the
1663 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1665 For example, to obtain the vector-def 'vx.1' in order to create the
1666 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1667 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1668 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1669 and return its def ('vx.1').
1670 Overall, to create the above sequence this function will be called 3 times:
1671 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1672 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1673 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1676 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1678 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1680 /* Do nothing; can reuse same def. */
1683 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1684 gcc_assert (def_stmt_info
);
1685 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1686 vec_oprnd
= PHI_RESULT (phi
);
1688 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1693 /* Get vectorized definitions for the operands to create a copy of an original
1694 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1697 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1698 vec
<tree
> *vec_oprnds0
,
1699 vec
<tree
> *vec_oprnds1
)
1701 tree vec_oprnd
= vec_oprnds0
->pop ();
1703 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1704 vec_oprnds0
->quick_push (vec_oprnd
);
1706 if (vec_oprnds1
&& vec_oprnds1
->length ())
1708 vec_oprnd
= vec_oprnds1
->pop ();
1709 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1710 vec_oprnds1
->quick_push (vec_oprnd
);
1715 /* Get vectorized definitions for OP0 and OP1. */
1718 vect_get_vec_defs (tree op0
, tree op1
, stmt_vec_info stmt_info
,
1719 vec
<tree
> *vec_oprnds0
,
1720 vec
<tree
> *vec_oprnds1
,
1725 auto_vec
<vec
<tree
> > vec_defs (SLP_TREE_CHILDREN (slp_node
).length ());
1726 vect_get_slp_defs (slp_node
, &vec_defs
, op1
? 2 : 1);
1727 *vec_oprnds0
= vec_defs
[0];
1729 *vec_oprnds1
= vec_defs
[1];
1735 vec_oprnds0
->create (1);
1736 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt_info
);
1737 vec_oprnds0
->quick_push (vec_oprnd
);
1741 vec_oprnds1
->create (1);
1742 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt_info
);
1743 vec_oprnds1
->quick_push (vec_oprnd
);
1748 /* Helper function called by vect_finish_replace_stmt and
1749 vect_finish_stmt_generation. Set the location of the new
1750 statement and create and return a stmt_vec_info for it. */
1752 static stmt_vec_info
1753 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1755 vec_info
*vinfo
= stmt_info
->vinfo
;
1757 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1762 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1764 /* While EH edges will generally prevent vectorization, stmt might
1765 e.g. be in a must-not-throw region. Ensure newly created stmts
1766 that could throw are part of the same region. */
1767 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1768 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1769 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1771 return vec_stmt_info
;
1774 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1775 which sets the same scalar result as STMT_INFO did. Create and return a
1776 stmt_vec_info for VEC_STMT. */
1779 vect_finish_replace_stmt (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1781 gcc_assert (gimple_get_lhs (stmt_info
->stmt
) == gimple_get_lhs (vec_stmt
));
1783 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt_info
->stmt
);
1784 gsi_replace (&gsi
, vec_stmt
, true);
1786 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1789 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1790 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1793 vect_finish_stmt_generation (stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1794 gimple_stmt_iterator
*gsi
)
1796 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1798 if (!gsi_end_p (*gsi
)
1799 && gimple_has_mem_ops (vec_stmt
))
1801 gimple
*at_stmt
= gsi_stmt (*gsi
);
1802 tree vuse
= gimple_vuse (at_stmt
);
1803 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1805 tree vdef
= gimple_vdef (at_stmt
);
1806 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1807 /* If we have an SSA vuse and insert a store, update virtual
1808 SSA form to avoid triggering the renamer. Do so only
1809 if we can easily see all uses - which is what almost always
1810 happens with the way vectorized stmts are inserted. */
1811 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1812 && ((is_gimple_assign (vec_stmt
)
1813 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1814 || (is_gimple_call (vec_stmt
)
1815 && !(gimple_call_flags (vec_stmt
)
1816 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1818 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1819 gimple_set_vdef (vec_stmt
, new_vdef
);
1820 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1824 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1825 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1828 /* We want to vectorize a call to combined function CFN with function
1829 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1830 as the types of all inputs. Check whether this is possible using
1831 an internal function, returning its code if so or IFN_LAST if not. */
1834 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1835 tree vectype_out
, tree vectype_in
)
1838 if (internal_fn_p (cfn
))
1839 ifn
= as_internal_fn (cfn
);
1841 ifn
= associated_internal_fn (fndecl
);
1842 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1844 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1845 if (info
.vectorizable
)
1847 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1848 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1849 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1850 OPTIMIZE_FOR_SPEED
))
1858 static tree
permute_vec_elements (tree
, tree
, tree
, stmt_vec_info
,
1859 gimple_stmt_iterator
*);
1861 /* Check whether a load or store statement in the loop described by
1862 LOOP_VINFO is possible in a fully-masked loop. This is testing
1863 whether the vectorizer pass has the appropriate support, as well as
1864 whether the target does.
1866 VLS_TYPE says whether the statement is a load or store and VECTYPE
1867 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1868 says how the load or store is going to be implemented and GROUP_SIZE
1869 is the number of load or store statements in the containing group.
1870 If the access is a gather load or scatter store, GS_INFO describes
1871 its arguments. If the load or store is conditional, SCALAR_MASK is the
1872 condition under which it occurs.
1874 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1875 supported, otherwise record the required mask types. */
1878 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1879 vec_load_store_type vls_type
, int group_size
,
1880 vect_memory_access_type memory_access_type
,
1881 gather_scatter_info
*gs_info
, tree scalar_mask
)
1883 /* Invariant loads need no special support. */
1884 if (memory_access_type
== VMAT_INVARIANT
)
1887 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1888 machine_mode vecmode
= TYPE_MODE (vectype
);
1889 bool is_load
= (vls_type
== VLS_LOAD
);
1890 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1893 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1894 : !vect_store_lanes_supported (vectype
, group_size
, true))
1896 if (dump_enabled_p ())
1897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1898 "can't use a fully-masked loop because the"
1899 " target doesn't have an appropriate masked"
1900 " load/store-lanes instruction.\n");
1901 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1904 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1905 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1909 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1911 internal_fn ifn
= (is_load
1912 ? IFN_MASK_GATHER_LOAD
1913 : IFN_MASK_SCATTER_STORE
);
1914 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1915 gs_info
->memory_type
,
1916 gs_info
->offset_vectype
,
1919 if (dump_enabled_p ())
1920 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1921 "can't use a fully-masked loop because the"
1922 " target doesn't have an appropriate masked"
1923 " gather load or scatter store instruction.\n");
1924 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1927 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1928 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1932 if (memory_access_type
!= VMAT_CONTIGUOUS
1933 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1935 /* Element X of the data must come from iteration i * VF + X of the
1936 scalar loop. We need more work to support other mappings. */
1937 if (dump_enabled_p ())
1938 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1939 "can't use a fully-masked loop because an access"
1940 " isn't contiguous.\n");
1941 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1945 machine_mode mask_mode
;
1946 if (!VECTOR_MODE_P (vecmode
)
1947 || !targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1948 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1950 if (dump_enabled_p ())
1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1952 "can't use a fully-masked loop because the target"
1953 " doesn't have the appropriate masked load or"
1955 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1958 /* We might load more scalars than we need for permuting SLP loads.
1959 We checked in get_group_load_store_type that the extra elements
1960 don't leak into a new vector. */
1961 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1962 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1963 unsigned int nvectors
;
1964 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1965 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1970 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1971 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1972 that needs to be applied to all loads and stores in a vectorized loop.
1973 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1975 MASK_TYPE is the type of both masks. If new statements are needed,
1976 insert them before GSI. */
1979 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1980 gimple_stmt_iterator
*gsi
)
1982 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1986 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1987 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1988 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1989 vec_mask
, loop_mask
);
1990 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1994 /* Determine whether we can use a gather load or scatter store to vectorize
1995 strided load or store STMT_INFO by truncating the current offset to a
1996 smaller width. We need to be able to construct an offset vector:
1998 { 0, X, X*2, X*3, ... }
2000 without loss of precision, where X is STMT_INFO's DR_STEP.
2002 Return true if this is possible, describing the gather load or scatter
2003 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2006 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
2007 loop_vec_info loop_vinfo
, bool masked_p
,
2008 gather_scatter_info
*gs_info
)
2010 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2011 data_reference
*dr
= dr_info
->dr
;
2012 tree step
= DR_STEP (dr
);
2013 if (TREE_CODE (step
) != INTEGER_CST
)
2015 /* ??? Perhaps we could use range information here? */
2016 if (dump_enabled_p ())
2017 dump_printf_loc (MSG_NOTE
, vect_location
,
2018 "cannot truncate variable step.\n");
2022 /* Get the number of bits in an element. */
2023 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2024 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2025 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2027 /* Set COUNT to the upper limit on the number of elements - 1.
2028 Start with the maximum vectorization factor. */
2029 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
2031 /* Try lowering COUNT to the number of scalar latch iterations. */
2032 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2033 widest_int max_iters
;
2034 if (max_loop_iterations (loop
, &max_iters
)
2035 && max_iters
< count
)
2036 count
= max_iters
.to_shwi ();
2038 /* Try scales of 1 and the element size. */
2039 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
2040 wi::overflow_type overflow
= wi::OVF_NONE
;
2041 for (int i
= 0; i
< 2; ++i
)
2043 int scale
= scales
[i
];
2045 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
2048 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
2049 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2052 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2053 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
2055 /* Find the narrowest viable offset type. */
2056 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
2057 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
2060 /* See whether the target supports the operation with an offset
2061 no narrower than OFFSET_TYPE. */
2062 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2063 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
2064 vectype
, memory_type
, offset_type
, scale
,
2065 &gs_info
->ifn
, &gs_info
->offset_vectype
))
2068 gs_info
->decl
= NULL_TREE
;
2069 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2070 but we don't need to store that here. */
2071 gs_info
->base
= NULL_TREE
;
2072 gs_info
->element_type
= TREE_TYPE (vectype
);
2073 gs_info
->offset
= fold_convert (offset_type
, step
);
2074 gs_info
->offset_dt
= vect_constant_def
;
2075 gs_info
->scale
= scale
;
2076 gs_info
->memory_type
= memory_type
;
2080 if (overflow
&& dump_enabled_p ())
2081 dump_printf_loc (MSG_NOTE
, vect_location
,
2082 "truncating gather/scatter offset to %d bits"
2083 " might change its value.\n", element_bits
);
2088 /* Return true if we can use gather/scatter internal functions to
2089 vectorize STMT_INFO, which is a grouped or strided load or store.
2090 MASKED_P is true if load or store is conditional. When returning
2091 true, fill in GS_INFO with the information required to perform the
2095 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2096 loop_vec_info loop_vinfo
, bool masked_p
,
2097 gather_scatter_info
*gs_info
)
2099 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2101 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2104 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
2105 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2107 gcc_assert (TYPE_PRECISION (new_offset_type
)
2108 >= TYPE_PRECISION (old_offset_type
));
2109 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
2111 if (dump_enabled_p ())
2112 dump_printf_loc (MSG_NOTE
, vect_location
,
2113 "using gather/scatter for strided/grouped access,"
2114 " scale = %d\n", gs_info
->scale
);
2119 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2120 elements with a known constant step. Return -1 if that step
2121 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2124 compare_step_with_zero (stmt_vec_info stmt_info
)
2126 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2127 return tree_int_cst_compare (vect_dr_behavior (dr_info
)->step
,
2131 /* If the target supports a permute mask that reverses the elements in
2132 a vector of type VECTYPE, return that mask, otherwise return null. */
2135 perm_mask_for_reverse (tree vectype
)
2137 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2139 /* The encoding has a single stepped pattern. */
2140 vec_perm_builder
sel (nunits
, 1, 3);
2141 for (int i
= 0; i
< 3; ++i
)
2142 sel
.quick_push (nunits
- 1 - i
);
2144 vec_perm_indices
indices (sel
, 1, nunits
);
2145 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2147 return vect_gen_perm_mask_checked (vectype
, indices
);
2150 /* A subroutine of get_load_store_type, with a subset of the same
2151 arguments. Handle the case where STMT_INFO is a load or store that
2152 accesses consecutive elements with a negative step. */
2154 static vect_memory_access_type
2155 get_negative_load_store_type (stmt_vec_info stmt_info
, tree vectype
,
2156 vec_load_store_type vls_type
,
2157 unsigned int ncopies
)
2159 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2160 dr_alignment_support alignment_support_scheme
;
2164 if (dump_enabled_p ())
2165 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2166 "multiple types with negative step.\n");
2167 return VMAT_ELEMENTWISE
;
2170 alignment_support_scheme
= vect_supportable_dr_alignment (dr_info
, false);
2171 if (alignment_support_scheme
!= dr_aligned
2172 && alignment_support_scheme
!= dr_unaligned_supported
)
2174 if (dump_enabled_p ())
2175 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2176 "negative step but alignment required.\n");
2177 return VMAT_ELEMENTWISE
;
2180 if (vls_type
== VLS_STORE_INVARIANT
)
2182 if (dump_enabled_p ())
2183 dump_printf_loc (MSG_NOTE
, vect_location
,
2184 "negative step with invariant source;"
2185 " no permute needed.\n");
2186 return VMAT_CONTIGUOUS_DOWN
;
2189 if (!perm_mask_for_reverse (vectype
))
2191 if (dump_enabled_p ())
2192 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2193 "negative step and reversing not supported.\n");
2194 return VMAT_ELEMENTWISE
;
2197 return VMAT_CONTIGUOUS_REVERSE
;
2200 /* STMT_INFO is either a masked or unconditional store. Return the value
2204 vect_get_store_rhs (stmt_vec_info stmt_info
)
2206 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2208 gcc_assert (gimple_assign_single_p (assign
));
2209 return gimple_assign_rhs1 (assign
);
2211 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2213 internal_fn ifn
= gimple_call_internal_fn (call
);
2214 int index
= internal_fn_stored_value_index (ifn
);
2215 gcc_assert (index
>= 0);
2216 return gimple_call_arg (call
, index
);
2221 /* A subroutine of get_load_store_type, with a subset of the same
2222 arguments. Handle the case where STMT_INFO is part of a grouped load
2225 For stores, the statements in the group are all consecutive
2226 and there is no gap at the end. For loads, the statements in the
2227 group might not be consecutive; there can be gaps between statements
2228 as well as at the end. */
2231 get_group_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2232 bool masked_p
, vec_load_store_type vls_type
,
2233 vect_memory_access_type
*memory_access_type
,
2234 gather_scatter_info
*gs_info
)
2236 vec_info
*vinfo
= stmt_info
->vinfo
;
2237 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2238 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2239 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2240 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2241 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2242 bool single_element_p
= (stmt_info
== first_stmt_info
2243 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2244 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2245 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2247 /* True if the vectorized statements would access beyond the last
2248 statement in the group. */
2249 bool overrun_p
= false;
2251 /* True if we can cope with such overrun by peeling for gaps, so that
2252 there is at least one final scalar iteration after the vector loop. */
2253 bool can_overrun_p
= (!masked_p
2254 && vls_type
== VLS_LOAD
2258 /* There can only be a gap at the end of the group if the stride is
2259 known at compile time. */
2260 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2262 /* Stores can't yet have gaps. */
2263 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2267 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2269 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2270 separated by the stride, until we have a complete vector.
2271 Fall back to scalar accesses if that isn't possible. */
2272 if (multiple_p (nunits
, group_size
))
2273 *memory_access_type
= VMAT_STRIDED_SLP
;
2275 *memory_access_type
= VMAT_ELEMENTWISE
;
2279 overrun_p
= loop_vinfo
&& gap
!= 0;
2280 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2282 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2283 "Grouped store with gaps requires"
2284 " non-consecutive accesses\n");
2287 /* An overrun is fine if the trailing elements are smaller
2288 than the alignment boundary B. Every vector access will
2289 be a multiple of B and so we are guaranteed to access a
2290 non-gap element in the same B-sized block. */
2292 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2293 / vect_get_scalar_dr_size (first_dr_info
)))
2296 /* If the gap splits the vector in half and the target
2297 can do half-vector operations avoid the epilogue peeling
2298 by simply loading half of the vector only. Usually
2299 the construction with an upper zero half will be elided. */
2300 dr_alignment_support alignment_support_scheme
;
2301 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2305 && (((alignment_support_scheme
2306 = vect_supportable_dr_alignment (first_dr_info
, false)))
2308 || alignment_support_scheme
== dr_unaligned_supported
)
2309 && known_eq (nunits
, (group_size
- gap
) * 2)
2310 && known_eq (nunits
, group_size
)
2311 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
2312 group_size
- gap
).exists (&vmode
)
2313 && (convert_optab_handler (vec_init_optab
,
2314 TYPE_MODE (vectype
), vmode
)
2315 != CODE_FOR_nothing
))
2318 if (overrun_p
&& !can_overrun_p
)
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2322 "Peeling for outer loop is not supported\n");
2325 int cmp
= compare_step_with_zero (stmt_info
);
2327 *memory_access_type
= get_negative_load_store_type
2328 (stmt_info
, vectype
, vls_type
, 1);
2331 gcc_assert (!loop_vinfo
|| cmp
> 0);
2332 *memory_access_type
= VMAT_CONTIGUOUS
;
2338 /* We can always handle this case using elementwise accesses,
2339 but see if something more efficient is available. */
2340 *memory_access_type
= VMAT_ELEMENTWISE
;
2342 /* If there is a gap at the end of the group then these optimizations
2343 would access excess elements in the last iteration. */
2344 bool would_overrun_p
= (gap
!= 0);
2345 /* An overrun is fine if the trailing elements are smaller than the
2346 alignment boundary B. Every vector access will be a multiple of B
2347 and so we are guaranteed to access a non-gap element in the
2348 same B-sized block. */
2351 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2352 / vect_get_scalar_dr_size (first_dr_info
)))
2353 would_overrun_p
= false;
2355 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2356 && (can_overrun_p
|| !would_overrun_p
)
2357 && compare_step_with_zero (stmt_info
) > 0)
2359 /* First cope with the degenerate case of a single-element
2361 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2362 *memory_access_type
= VMAT_CONTIGUOUS
;
2364 /* Otherwise try using LOAD/STORE_LANES. */
2365 if (*memory_access_type
== VMAT_ELEMENTWISE
2366 && (vls_type
== VLS_LOAD
2367 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2368 : vect_store_lanes_supported (vectype
, group_size
,
2371 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2372 overrun_p
= would_overrun_p
;
2375 /* If that fails, try using permuting loads. */
2376 if (*memory_access_type
== VMAT_ELEMENTWISE
2377 && (vls_type
== VLS_LOAD
2378 ? vect_grouped_load_supported (vectype
, single_element_p
,
2380 : vect_grouped_store_supported (vectype
, group_size
)))
2382 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2383 overrun_p
= would_overrun_p
;
2387 /* As a last resort, trying using a gather load or scatter store.
2389 ??? Although the code can handle all group sizes correctly,
2390 it probably isn't a win to use separate strided accesses based
2391 on nearby locations. Or, even if it's a win over scalar code,
2392 it might not be a win over vectorizing at a lower VF, if that
2393 allows us to use contiguous accesses. */
2394 if (*memory_access_type
== VMAT_ELEMENTWISE
2397 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2399 *memory_access_type
= VMAT_GATHER_SCATTER
;
2402 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2404 /* STMT is the leader of the group. Check the operands of all the
2405 stmts of the group. */
2406 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2407 while (next_stmt_info
)
2409 tree op
= vect_get_store_rhs (next_stmt_info
);
2410 enum vect_def_type dt
;
2411 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2413 if (dump_enabled_p ())
2414 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2415 "use not simple.\n");
2418 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2424 gcc_assert (can_overrun_p
);
2425 if (dump_enabled_p ())
2426 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2427 "Data access with gaps requires scalar "
2429 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2435 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2436 if there is a memory access type that the vectorized form can use,
2437 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2438 or scatters, fill in GS_INFO accordingly.
2440 SLP says whether we're performing SLP rather than loop vectorization.
2441 MASKED_P is true if the statement is conditional on a vectorized mask.
2442 VECTYPE is the vector type that the vectorized statements will use.
2443 NCOPIES is the number of vector statements that will be needed. */
2446 get_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2447 bool masked_p
, vec_load_store_type vls_type
,
2448 unsigned int ncopies
,
2449 vect_memory_access_type
*memory_access_type
,
2450 gather_scatter_info
*gs_info
)
2452 vec_info
*vinfo
= stmt_info
->vinfo
;
2453 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2454 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2455 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2457 *memory_access_type
= VMAT_GATHER_SCATTER
;
2458 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2460 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2461 &gs_info
->offset_dt
,
2462 &gs_info
->offset_vectype
))
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2466 "%s index use not simple.\n",
2467 vls_type
== VLS_LOAD
? "gather" : "scatter");
2471 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2473 if (!get_group_load_store_type (stmt_info
, vectype
, slp
, masked_p
,
2474 vls_type
, memory_access_type
, gs_info
))
2477 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2481 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2483 *memory_access_type
= VMAT_GATHER_SCATTER
;
2485 *memory_access_type
= VMAT_ELEMENTWISE
;
2489 int cmp
= compare_step_with_zero (stmt_info
);
2491 *memory_access_type
= get_negative_load_store_type
2492 (stmt_info
, vectype
, vls_type
, ncopies
);
2495 gcc_assert (vls_type
== VLS_LOAD
);
2496 *memory_access_type
= VMAT_INVARIANT
;
2499 *memory_access_type
= VMAT_CONTIGUOUS
;
2502 if ((*memory_access_type
== VMAT_ELEMENTWISE
2503 || *memory_access_type
== VMAT_STRIDED_SLP
)
2504 && !nunits
.is_constant ())
2506 if (dump_enabled_p ())
2507 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2508 "Not using elementwise accesses due to variable "
2509 "vectorization factor.\n");
2513 /* FIXME: At the moment the cost model seems to underestimate the
2514 cost of using elementwise accesses. This check preserves the
2515 traditional behavior until that can be fixed. */
2516 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2517 if (!first_stmt_info
)
2518 first_stmt_info
= stmt_info
;
2519 if (*memory_access_type
== VMAT_ELEMENTWISE
2520 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2521 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2522 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2523 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2527 "not falling back to elementwise accesses\n");
2533 /* Return true if boolean argument MASK is suitable for vectorizing
2534 conditional load or store STMT_INFO. When returning true, store the type
2535 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2536 in *MASK_VECTYPE_OUT. */
2539 vect_check_load_store_mask (stmt_vec_info stmt_info
, tree mask
,
2540 vect_def_type
*mask_dt_out
,
2541 tree
*mask_vectype_out
)
2543 vec_info
*vinfo
= stmt_info
->vinfo
;
2544 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2546 if (dump_enabled_p ())
2547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2548 "mask argument is not a boolean.\n");
2552 if (TREE_CODE (mask
) != SSA_NAME
)
2554 if (dump_enabled_p ())
2555 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2556 "mask argument is not an SSA name.\n");
2560 enum vect_def_type mask_dt
;
2562 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2564 if (dump_enabled_p ())
2565 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2566 "mask use not simple.\n");
2570 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2572 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2574 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2576 if (dump_enabled_p ())
2577 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2578 "could not find an appropriate vector mask type.\n");
2582 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2583 TYPE_VECTOR_SUBPARTS (vectype
)))
2585 if (dump_enabled_p ())
2586 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2587 "vector mask type %T"
2588 " does not match vector data type %T.\n",
2589 mask_vectype
, vectype
);
2594 *mask_dt_out
= mask_dt
;
2595 *mask_vectype_out
= mask_vectype
;
2599 /* Return true if stored value RHS is suitable for vectorizing store
2600 statement STMT_INFO. When returning true, store the type of the
2601 definition in *RHS_DT_OUT, the type of the vectorized store value in
2602 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2605 vect_check_store_rhs (stmt_vec_info stmt_info
, tree rhs
,
2606 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2607 vec_load_store_type
*vls_type_out
)
2609 /* In the case this is a store from a constant make sure
2610 native_encode_expr can handle it. */
2611 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2613 if (dump_enabled_p ())
2614 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2615 "cannot encode constant as a byte sequence.\n");
2619 enum vect_def_type rhs_dt
;
2621 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2623 if (dump_enabled_p ())
2624 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2625 "use not simple.\n");
2629 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2630 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2632 if (dump_enabled_p ())
2633 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2634 "incompatible vector types.\n");
2638 *rhs_dt_out
= rhs_dt
;
2639 *rhs_vectype_out
= rhs_vectype
;
2640 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2641 *vls_type_out
= VLS_STORE_INVARIANT
;
2643 *vls_type_out
= VLS_STORE
;
2647 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2648 Note that we support masks with floating-point type, in which case the
2649 floats are interpreted as a bitmask. */
2652 vect_build_all_ones_mask (stmt_vec_info stmt_info
, tree masktype
)
2654 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2655 return build_int_cst (masktype
, -1);
2656 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2658 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2659 mask
= build_vector_from_val (masktype
, mask
);
2660 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2662 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2666 for (int j
= 0; j
< 6; ++j
)
2668 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2669 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2670 mask
= build_vector_from_val (masktype
, mask
);
2671 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2676 /* Build an all-zero merge value of type VECTYPE while vectorizing
2677 STMT_INFO as a gather load. */
2680 vect_build_zero_merge_argument (stmt_vec_info stmt_info
, tree vectype
)
2683 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2684 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2685 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2689 for (int j
= 0; j
< 6; ++j
)
2691 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2692 merge
= build_real (TREE_TYPE (vectype
), r
);
2696 merge
= build_vector_from_val (vectype
, merge
);
2697 return vect_init_vector (stmt_info
, merge
, vectype
, NULL
);
2700 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2701 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2702 the gather load operation. If the load is conditional, MASK is the
2703 unvectorized condition and MASK_DT is its definition type, otherwise
2707 vect_build_gather_load_calls (stmt_vec_info stmt_info
,
2708 gimple_stmt_iterator
*gsi
,
2709 stmt_vec_info
*vec_stmt
,
2710 gather_scatter_info
*gs_info
,
2713 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2714 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2715 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2716 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2717 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2718 edge pe
= loop_preheader_edge (loop
);
2719 enum { NARROW
, NONE
, WIDEN
} modifier
;
2720 poly_uint64 gather_off_nunits
2721 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2723 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2724 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2725 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2726 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2727 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2728 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2729 tree scaletype
= TREE_VALUE (arglist
);
2730 tree real_masktype
= masktype
;
2731 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2733 || TREE_CODE (masktype
) == INTEGER_TYPE
2734 || types_compatible_p (srctype
, masktype
)));
2735 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2736 masktype
= truth_type_for (srctype
);
2738 tree mask_halftype
= masktype
;
2739 tree perm_mask
= NULL_TREE
;
2740 tree mask_perm_mask
= NULL_TREE
;
2741 if (known_eq (nunits
, gather_off_nunits
))
2743 else if (known_eq (nunits
* 2, gather_off_nunits
))
2747 /* Currently widening gathers and scatters are only supported for
2748 fixed-length vectors. */
2749 int count
= gather_off_nunits
.to_constant ();
2750 vec_perm_builder
sel (count
, count
, 1);
2751 for (int i
= 0; i
< count
; ++i
)
2752 sel
.quick_push (i
| (count
/ 2));
2754 vec_perm_indices
indices (sel
, 1, count
);
2755 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2758 else if (known_eq (nunits
, gather_off_nunits
* 2))
2762 /* Currently narrowing gathers and scatters are only supported for
2763 fixed-length vectors. */
2764 int count
= nunits
.to_constant ();
2765 vec_perm_builder
sel (count
, count
, 1);
2766 sel
.quick_grow (count
);
2767 for (int i
= 0; i
< count
; ++i
)
2768 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2769 vec_perm_indices
indices (sel
, 2, count
);
2770 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2774 if (mask
&& masktype
== real_masktype
)
2776 for (int i
= 0; i
< count
; ++i
)
2777 sel
[i
] = i
| (count
/ 2);
2778 indices
.new_vector (sel
, 2, count
);
2779 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2782 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2787 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2788 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2790 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2791 if (!is_gimple_min_invariant (ptr
))
2794 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2795 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2796 gcc_assert (!new_bb
);
2799 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2801 tree vec_oprnd0
= NULL_TREE
;
2802 tree vec_mask
= NULL_TREE
;
2803 tree src_op
= NULL_TREE
;
2804 tree mask_op
= NULL_TREE
;
2805 tree prev_res
= NULL_TREE
;
2806 stmt_vec_info prev_stmt_info
= NULL
;
2810 src_op
= vect_build_zero_merge_argument (stmt_info
, rettype
);
2811 mask_op
= vect_build_all_ones_mask (stmt_info
, masktype
);
2814 for (int j
= 0; j
< ncopies
; ++j
)
2817 if (modifier
== WIDEN
&& (j
& 1))
2818 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2819 perm_mask
, stmt_info
, gsi
);
2822 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
);
2824 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2827 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2829 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2830 TYPE_VECTOR_SUBPARTS (idxtype
)));
2831 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2832 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2833 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2834 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2840 if (mask_perm_mask
&& (j
& 1))
2841 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2842 mask_perm_mask
, stmt_info
, gsi
);
2846 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
);
2847 else if (modifier
!= NARROW
|| (j
& 1) == 0)
2848 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2852 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2854 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2855 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2856 gcc_assert (known_eq (sub1
, sub2
));
2857 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2858 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2860 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2861 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2865 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2867 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2869 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2870 : VEC_UNPACK_LO_EXPR
,
2872 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2878 tree mask_arg
= mask_op
;
2879 if (masktype
!= real_masktype
)
2881 tree utype
, optype
= TREE_TYPE (mask_op
);
2882 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2883 utype
= real_masktype
;
2885 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2886 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2887 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2889 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2890 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2892 if (!useless_type_conversion_p (real_masktype
, utype
))
2894 gcc_assert (TYPE_PRECISION (utype
)
2895 <= TYPE_PRECISION (real_masktype
));
2896 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2897 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2898 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2901 src_op
= build_zero_cst (srctype
);
2903 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2906 stmt_vec_info new_stmt_info
;
2907 if (!useless_type_conversion_p (vectype
, rettype
))
2909 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2910 TYPE_VECTOR_SUBPARTS (rettype
)));
2911 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2912 gimple_call_set_lhs (new_call
, op
);
2913 vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2914 var
= make_ssa_name (vec_dest
);
2915 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2916 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2918 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2922 var
= make_ssa_name (vec_dest
, new_call
);
2923 gimple_call_set_lhs (new_call
, var
);
2925 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2928 if (modifier
== NARROW
)
2935 var
= permute_vec_elements (prev_res
, var
, perm_mask
,
2937 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2940 if (prev_stmt_info
== NULL
)
2941 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2943 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2944 prev_stmt_info
= new_stmt_info
;
2948 /* Prepare the base and offset in GS_INFO for vectorization.
2949 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2950 to the vectorized offset argument for the first copy of STMT_INFO.
2951 STMT_INFO is the statement described by GS_INFO and LOOP is the
2955 vect_get_gather_scatter_ops (class loop
*loop
, stmt_vec_info stmt_info
,
2956 gather_scatter_info
*gs_info
,
2957 tree
*dataref_ptr
, tree
*vec_offset
)
2959 gimple_seq stmts
= NULL
;
2960 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2964 edge pe
= loop_preheader_edge (loop
);
2965 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2966 gcc_assert (!new_bb
);
2968 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
,
2969 gs_info
->offset_vectype
);
2972 /* Prepare to implement a grouped or strided load or store using
2973 the gather load or scatter store operation described by GS_INFO.
2974 STMT_INFO is the load or store statement.
2976 Set *DATAREF_BUMP to the amount that should be added to the base
2977 address after each copy of the vectorized statement. Set *VEC_OFFSET
2978 to an invariant offset vector in which element I has the value
2979 I * DR_STEP / SCALE. */
2982 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2983 loop_vec_info loop_vinfo
,
2984 gather_scatter_info
*gs_info
,
2985 tree
*dataref_bump
, tree
*vec_offset
)
2987 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2988 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2989 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2992 tree bump
= size_binop (MULT_EXPR
,
2993 fold_convert (sizetype
, DR_STEP (dr
)),
2994 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2995 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2997 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2999 /* The offset given in GS_INFO can have pointer type, so use the element
3000 type of the vector instead. */
3001 tree offset_type
= TREE_TYPE (gs_info
->offset
);
3002 offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3004 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3005 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
3006 ssize_int (gs_info
->scale
));
3007 step
= fold_convert (offset_type
, step
);
3008 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
3010 /* Create {0, X, X*2, X*3, ...}. */
3011 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3012 build_zero_cst (offset_type
), step
);
3014 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3017 /* Return the amount that should be added to a vector pointer to move
3018 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3019 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3023 vect_get_data_ptr_increment (dr_vec_info
*dr_info
, tree aggr_type
,
3024 vect_memory_access_type memory_access_type
)
3026 if (memory_access_type
== VMAT_INVARIANT
)
3027 return size_zero_node
;
3029 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3030 tree step
= vect_dr_behavior (dr_info
)->step
;
3031 if (tree_int_cst_sgn (step
) == -1)
3032 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3036 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3039 vectorizable_bswap (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3040 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3041 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3044 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3045 vec_info
*vinfo
= stmt_info
->vinfo
;
3046 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3049 op
= gimple_call_arg (stmt
, 0);
3050 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3051 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3053 /* Multiple types in SLP are handled by creating the appropriate number of
3054 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3059 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3061 gcc_assert (ncopies
>= 1);
3063 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3067 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3068 unsigned word_bytes
;
3069 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3072 /* The encoding uses one stepped pattern for each byte in the word. */
3073 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3074 for (unsigned i
= 0; i
< 3; ++i
)
3075 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3076 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3078 vec_perm_indices
indices (elts
, 1, num_bytes
);
3079 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3084 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3085 DUMP_VECT_SCOPE ("vectorizable_bswap");
3088 record_stmt_cost (cost_vec
,
3089 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3090 record_stmt_cost (cost_vec
,
3091 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3096 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3099 vec
<tree
> vec_oprnds
= vNULL
;
3100 stmt_vec_info new_stmt_info
= NULL
;
3101 stmt_vec_info prev_stmt_info
= NULL
;
3102 for (unsigned j
= 0; j
< ncopies
; j
++)
3106 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
3108 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3110 /* Arguments are ready. create the new vector stmt. */
3113 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3116 tree tem
= make_ssa_name (char_vectype
);
3117 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3118 char_vectype
, vop
));
3119 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3120 tree tem2
= make_ssa_name (char_vectype
);
3121 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3122 tem
, tem
, bswap_vconst
);
3123 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3124 tem
= make_ssa_name (vectype
);
3125 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3128 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3130 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3137 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3139 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3141 prev_stmt_info
= new_stmt_info
;
3144 vec_oprnds
.release ();
3148 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3149 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3150 in a single step. On success, store the binary pack code in
3154 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3155 tree_code
*convert_code
)
3157 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3158 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3162 int multi_step_cvt
= 0;
3163 auto_vec
<tree
, 8> interm_types
;
3164 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3165 &code
, &multi_step_cvt
, &interm_types
)
3169 *convert_code
= code
;
3173 /* Function vectorizable_call.
3175 Check if STMT_INFO performs a function call that can be vectorized.
3176 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3177 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3178 Return true if STMT_INFO is vectorizable in this way. */
3181 vectorizable_call (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3182 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3183 stmt_vector_for_cost
*cost_vec
)
3189 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3190 stmt_vec_info prev_stmt_info
;
3191 tree vectype_out
, vectype_in
;
3192 poly_uint64 nunits_in
;
3193 poly_uint64 nunits_out
;
3194 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3195 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3196 vec_info
*vinfo
= stmt_info
->vinfo
;
3197 tree fndecl
, new_temp
, rhs_type
;
3198 enum vect_def_type dt
[4]
3199 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3200 vect_unknown_def_type
};
3201 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3202 int ndts
= ARRAY_SIZE (dt
);
3204 auto_vec
<tree
, 8> vargs
;
3205 auto_vec
<tree
, 8> orig_vargs
;
3206 enum { NARROW
, NONE
, WIDEN
} modifier
;
3210 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3213 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3217 /* Is STMT_INFO a vectorizable call? */
3218 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3222 if (gimple_call_internal_p (stmt
)
3223 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3224 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3225 /* Handled by vectorizable_load and vectorizable_store. */
3228 if (gimple_call_lhs (stmt
) == NULL_TREE
3229 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3232 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3234 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3236 /* Process function arguments. */
3237 rhs_type
= NULL_TREE
;
3238 vectype_in
= NULL_TREE
;
3239 nargs
= gimple_call_num_args (stmt
);
3241 /* Bail out if the function has more than three arguments, we do not have
3242 interesting builtin functions to vectorize with more than two arguments
3243 except for fma. No arguments is also not good. */
3244 if (nargs
== 0 || nargs
> 4)
3247 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3248 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3249 if (cfn
== CFN_GOMP_SIMD_LANE
)
3252 rhs_type
= unsigned_type_node
;
3256 if (internal_fn_p (cfn
))
3257 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3259 for (i
= 0; i
< nargs
; i
++)
3261 op
= gimple_call_arg (stmt
, i
);
3262 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &vectypes
[i
]))
3264 if (dump_enabled_p ())
3265 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3266 "use not simple.\n");
3270 /* Skip the mask argument to an internal function. This operand
3271 has been converted via a pattern if necessary. */
3272 if ((int) i
== mask_opno
)
3275 /* We can only handle calls with arguments of the same type. */
3277 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3279 if (dump_enabled_p ())
3280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3281 "argument types differ.\n");
3285 rhs_type
= TREE_TYPE (op
);
3288 vectype_in
= vectypes
[i
];
3289 else if (vectypes
[i
]
3290 && !types_compatible_p (vectypes
[i
], vectype_in
))
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3294 "argument vector types differ.\n");
3298 /* If all arguments are external or constant defs, infer the vector type
3299 from the scalar type. */
3301 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
3303 gcc_assert (vectype_in
);
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3308 "no vectype for scalar type %T\n", rhs_type
);
3312 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3313 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3314 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3315 by a pack of the two vectors into an SI vector. We would need
3316 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3317 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3319 if (dump_enabled_p ())
3320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3321 "mismatched vector sizes %T and %T\n",
3322 vectype_in
, vectype_out
);
3327 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3328 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3329 if (known_eq (nunits_in
* 2, nunits_out
))
3331 else if (known_eq (nunits_out
, nunits_in
))
3333 else if (known_eq (nunits_out
* 2, nunits_in
))
3338 /* We only handle functions that do not read or clobber memory. */
3339 if (gimple_vuse (stmt
))
3341 if (dump_enabled_p ())
3342 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3343 "function reads from or writes to memory.\n");
3347 /* For now, we only vectorize functions if a target specific builtin
3348 is available. TODO -- in some cases, it might be profitable to
3349 insert the calls for pieces of the vector, in order to be able
3350 to vectorize other operations in the loop. */
3352 internal_fn ifn
= IFN_LAST
;
3353 tree callee
= gimple_call_fndecl (stmt
);
3355 /* First try using an internal function. */
3356 tree_code convert_code
= ERROR_MARK
;
3358 && (modifier
== NONE
3359 || (modifier
== NARROW
3360 && simple_integer_narrowing (vectype_out
, vectype_in
,
3362 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3365 /* If that fails, try asking for a target-specific built-in function. */
3366 if (ifn
== IFN_LAST
)
3368 if (cfn
!= CFN_LAST
)
3369 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3370 (cfn
, vectype_out
, vectype_in
);
3371 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3372 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3373 (callee
, vectype_out
, vectype_in
);
3376 if (ifn
== IFN_LAST
&& !fndecl
)
3378 if (cfn
== CFN_GOMP_SIMD_LANE
3381 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3382 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3383 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3384 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3386 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3387 { 0, 1, 2, ... vf - 1 } vector. */
3388 gcc_assert (nargs
== 0);
3390 else if (modifier
== NONE
3391 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3392 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3393 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3394 return vectorizable_bswap (stmt_info
, gsi
, vec_stmt
, slp_node
,
3395 vectype_in
, cost_vec
);
3398 if (dump_enabled_p ())
3399 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3400 "function is not vectorizable.\n");
3407 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3408 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3410 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3412 /* Sanity check: make sure that at least one copy of the vectorized stmt
3413 needs to be generated. */
3414 gcc_assert (ncopies
>= 1);
3416 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3417 if (!vec_stmt
) /* transformation not required. */
3419 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3420 DUMP_VECT_SCOPE ("vectorizable_call");
3421 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3422 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3423 record_stmt_cost (cost_vec
, ncopies
/ 2,
3424 vec_promote_demote
, stmt_info
, 0, vect_body
);
3426 if (loop_vinfo
&& mask_opno
>= 0)
3428 unsigned int nvectors
= (slp_node
3429 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3431 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3432 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3433 vectype_out
, scalar_mask
);
3440 if (dump_enabled_p ())
3441 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3444 scalar_dest
= gimple_call_lhs (stmt
);
3445 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3447 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3449 stmt_vec_info new_stmt_info
= NULL
;
3450 prev_stmt_info
= NULL
;
3451 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3453 tree prev_res
= NULL_TREE
;
3454 vargs
.safe_grow (nargs
);
3455 orig_vargs
.safe_grow (nargs
);
3456 for (j
= 0; j
< ncopies
; ++j
)
3458 /* Build argument list for the vectorized call. */
3461 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3462 vec
<tree
> vec_oprnds0
;
3464 vect_get_slp_defs (slp_node
, &vec_defs
);
3465 vec_oprnds0
= vec_defs
[0];
3467 /* Arguments are ready. Create the new vector stmt. */
3468 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3471 for (k
= 0; k
< nargs
; k
++)
3473 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3474 vargs
[k
] = vec_oprndsk
[i
];
3476 if (modifier
== NARROW
)
3478 /* We don't define any narrowing conditional functions
3480 gcc_assert (mask_opno
< 0);
3481 tree half_res
= make_ssa_name (vectype_in
);
3483 = gimple_build_call_internal_vec (ifn
, vargs
);
3484 gimple_call_set_lhs (call
, half_res
);
3485 gimple_call_set_nothrow (call
, true);
3486 vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3489 prev_res
= half_res
;
3492 new_temp
= make_ssa_name (vec_dest
);
3494 = gimple_build_assign (new_temp
, convert_code
,
3495 prev_res
, half_res
);
3497 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
3502 if (mask_opno
>= 0 && masked_loop_p
)
3504 unsigned int vec_num
= vec_oprnds0
.length ();
3505 /* Always true for SLP. */
3506 gcc_assert (ncopies
== 1);
3507 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3509 vargs
[mask_opno
] = prepare_load_store_mask
3510 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3514 if (ifn
!= IFN_LAST
)
3515 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3517 call
= gimple_build_call_vec (fndecl
, vargs
);
3518 new_temp
= make_ssa_name (vec_dest
, call
);
3519 gimple_call_set_lhs (call
, new_temp
);
3520 gimple_call_set_nothrow (call
, true);
3522 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3524 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3527 for (i
= 0; i
< nargs
; i
++)
3529 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3530 vec_oprndsi
.release ();
3535 if (mask_opno
>= 0 && !vectypes
[mask_opno
])
3537 gcc_assert (modifier
!= WIDEN
);
3538 vectypes
[mask_opno
] = truth_type_for (vectype_in
);
3541 for (i
= 0; i
< nargs
; i
++)
3543 op
= gimple_call_arg (stmt
, i
);
3546 = vect_get_vec_def_for_operand (op
, stmt_info
, vectypes
[i
]);
3549 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3551 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3554 if (mask_opno
>= 0 && masked_loop_p
)
3556 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3559 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3560 vargs
[mask_opno
], gsi
);
3563 if (cfn
== CFN_GOMP_SIMD_LANE
)
3565 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3567 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3568 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3569 vect_init_vector_1 (stmt_info
, init_stmt
, NULL
);
3570 new_temp
= make_ssa_name (vec_dest
);
3571 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3573 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3575 else if (modifier
== NARROW
)
3577 /* We don't define any narrowing conditional functions at
3579 gcc_assert (mask_opno
< 0);
3580 tree half_res
= make_ssa_name (vectype_in
);
3581 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3582 gimple_call_set_lhs (call
, half_res
);
3583 gimple_call_set_nothrow (call
, true);
3584 vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3587 prev_res
= half_res
;
3590 new_temp
= make_ssa_name (vec_dest
);
3591 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3592 prev_res
, half_res
);
3594 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3599 if (ifn
!= IFN_LAST
)
3600 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3602 call
= gimple_build_call_vec (fndecl
, vargs
);
3603 new_temp
= make_ssa_name (vec_dest
, call
);
3604 gimple_call_set_lhs (call
, new_temp
);
3605 gimple_call_set_nothrow (call
, true);
3607 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3610 if (j
== (modifier
== NARROW
? 1 : 0))
3611 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3613 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3615 prev_stmt_info
= new_stmt_info
;
3618 else if (modifier
== NARROW
)
3620 /* We don't define any narrowing conditional functions at present. */
3621 gcc_assert (mask_opno
< 0);
3622 for (j
= 0; j
< ncopies
; ++j
)
3624 /* Build argument list for the vectorized call. */
3626 vargs
.create (nargs
* 2);
3632 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3633 vec
<tree
> vec_oprnds0
;
3635 vect_get_slp_defs (slp_node
, &vec_defs
);
3636 vec_oprnds0
= vec_defs
[0];
3638 /* Arguments are ready. Create the new vector stmt. */
3639 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3643 for (k
= 0; k
< nargs
; k
++)
3645 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3646 vargs
.quick_push (vec_oprndsk
[i
]);
3647 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3650 if (ifn
!= IFN_LAST
)
3651 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3653 call
= gimple_build_call_vec (fndecl
, vargs
);
3654 new_temp
= make_ssa_name (vec_dest
, call
);
3655 gimple_call_set_lhs (call
, new_temp
);
3656 gimple_call_set_nothrow (call
, true);
3658 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3659 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3662 for (i
= 0; i
< nargs
; i
++)
3664 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3665 vec_oprndsi
.release ();
3670 for (i
= 0; i
< nargs
; i
++)
3672 op
= gimple_call_arg (stmt
, i
);
3676 = vect_get_vec_def_for_operand (op
, stmt_info
,
3679 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3683 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3686 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3688 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3691 vargs
.quick_push (vec_oprnd0
);
3692 vargs
.quick_push (vec_oprnd1
);
3695 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3696 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3697 gimple_call_set_lhs (new_stmt
, new_temp
);
3699 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3702 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3704 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3706 prev_stmt_info
= new_stmt_info
;
3709 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3712 /* No current target implements this case. */
3717 /* The call in STMT might prevent it from being removed in dce.
3718 We however cannot remove it here, due to the way the ssa name
3719 it defines is mapped to the new definition. So just replace
3720 rhs of the statement with something harmless. */
3725 stmt_info
= vect_orig_stmt (stmt_info
);
3726 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3729 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3730 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3736 struct simd_call_arg_info
3740 HOST_WIDE_INT linear_step
;
3741 enum vect_def_type dt
;
3743 bool simd_lane_linear
;
3746 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3747 is linear within simd lane (but not within whole loop), note it in
3751 vect_simd_lane_linear (tree op
, class loop
*loop
,
3752 struct simd_call_arg_info
*arginfo
)
3754 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3756 if (!is_gimple_assign (def_stmt
)
3757 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3758 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3761 tree base
= gimple_assign_rhs1 (def_stmt
);
3762 HOST_WIDE_INT linear_step
= 0;
3763 tree v
= gimple_assign_rhs2 (def_stmt
);
3764 while (TREE_CODE (v
) == SSA_NAME
)
3767 def_stmt
= SSA_NAME_DEF_STMT (v
);
3768 if (is_gimple_assign (def_stmt
))
3769 switch (gimple_assign_rhs_code (def_stmt
))
3772 t
= gimple_assign_rhs2 (def_stmt
);
3773 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3775 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3776 v
= gimple_assign_rhs1 (def_stmt
);
3779 t
= gimple_assign_rhs2 (def_stmt
);
3780 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3782 linear_step
= tree_to_shwi (t
);
3783 v
= gimple_assign_rhs1 (def_stmt
);
3786 t
= gimple_assign_rhs1 (def_stmt
);
3787 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3788 || (TYPE_PRECISION (TREE_TYPE (v
))
3789 < TYPE_PRECISION (TREE_TYPE (t
))))
3798 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3800 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3801 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3806 arginfo
->linear_step
= linear_step
;
3808 arginfo
->simd_lane_linear
= true;
3814 /* Return the number of elements in vector type VECTYPE, which is associated
3815 with a SIMD clone. At present these vectors always have a constant
3818 static unsigned HOST_WIDE_INT
3819 simd_clone_subparts (tree vectype
)
3821 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3824 /* Function vectorizable_simd_clone_call.
3826 Check if STMT_INFO performs a function call that can be vectorized
3827 by calling a simd clone of the function.
3828 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3829 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3830 Return true if STMT_INFO is vectorizable in this way. */
3833 vectorizable_simd_clone_call (stmt_vec_info stmt_info
,
3834 gimple_stmt_iterator
*gsi
,
3835 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3836 stmt_vector_for_cost
*)
3841 tree vec_oprnd0
= NULL_TREE
;
3842 stmt_vec_info prev_stmt_info
;
3844 unsigned int nunits
;
3845 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3846 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3847 vec_info
*vinfo
= stmt_info
->vinfo
;
3848 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3849 tree fndecl
, new_temp
;
3851 auto_vec
<simd_call_arg_info
> arginfo
;
3852 vec
<tree
> vargs
= vNULL
;
3854 tree lhs
, rtype
, ratype
;
3855 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3857 /* Is STMT a vectorizable call? */
3858 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3862 fndecl
= gimple_call_fndecl (stmt
);
3863 if (fndecl
== NULL_TREE
)
3866 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3867 if (node
== NULL
|| node
->simd_clones
== NULL
)
3870 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3873 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3877 if (gimple_call_lhs (stmt
)
3878 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3881 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3883 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3885 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3892 /* Process function arguments. */
3893 nargs
= gimple_call_num_args (stmt
);
3895 /* Bail out if the function has zero arguments. */
3899 arginfo
.reserve (nargs
, true);
3901 for (i
= 0; i
< nargs
; i
++)
3903 simd_call_arg_info thisarginfo
;
3906 thisarginfo
.linear_step
= 0;
3907 thisarginfo
.align
= 0;
3908 thisarginfo
.op
= NULL_TREE
;
3909 thisarginfo
.simd_lane_linear
= false;
3911 op
= gimple_call_arg (stmt
, i
);
3912 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3913 &thisarginfo
.vectype
)
3914 || thisarginfo
.dt
== vect_uninitialized_def
)
3916 if (dump_enabled_p ())
3917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3918 "use not simple.\n");
3922 if (thisarginfo
.dt
== vect_constant_def
3923 || thisarginfo
.dt
== vect_external_def
)
3924 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3926 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3928 /* For linear arguments, the analyze phase should have saved
3929 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3930 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3931 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3933 gcc_assert (vec_stmt
);
3934 thisarginfo
.linear_step
3935 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3937 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3938 thisarginfo
.simd_lane_linear
3939 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3940 == boolean_true_node
);
3941 /* If loop has been peeled for alignment, we need to adjust it. */
3942 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3943 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3944 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3946 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3947 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3948 tree opt
= TREE_TYPE (thisarginfo
.op
);
3949 bias
= fold_convert (TREE_TYPE (step
), bias
);
3950 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3952 = fold_build2 (POINTER_TYPE_P (opt
)
3953 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3954 thisarginfo
.op
, bias
);
3958 && thisarginfo
.dt
!= vect_constant_def
3959 && thisarginfo
.dt
!= vect_external_def
3961 && TREE_CODE (op
) == SSA_NAME
3962 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3964 && tree_fits_shwi_p (iv
.step
))
3966 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3967 thisarginfo
.op
= iv
.base
;
3969 else if ((thisarginfo
.dt
== vect_constant_def
3970 || thisarginfo
.dt
== vect_external_def
)
3971 && POINTER_TYPE_P (TREE_TYPE (op
)))
3972 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3973 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3975 if (POINTER_TYPE_P (TREE_TYPE (op
))
3976 && !thisarginfo
.linear_step
3978 && thisarginfo
.dt
!= vect_constant_def
3979 && thisarginfo
.dt
!= vect_external_def
3982 && TREE_CODE (op
) == SSA_NAME
)
3983 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3985 arginfo
.quick_push (thisarginfo
);
3988 unsigned HOST_WIDE_INT vf
;
3989 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3991 if (dump_enabled_p ())
3992 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3993 "not considering SIMD clones; not yet supported"
3994 " for variable-width vectors.\n");
3998 unsigned int badness
= 0;
3999 struct cgraph_node
*bestn
= NULL
;
4000 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4001 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4003 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4004 n
= n
->simdclone
->next_clone
)
4006 unsigned int this_badness
= 0;
4007 if (n
->simdclone
->simdlen
> vf
4008 || n
->simdclone
->nargs
!= nargs
)
4010 if (n
->simdclone
->simdlen
< vf
)
4011 this_badness
+= (exact_log2 (vf
)
4012 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
4013 if (n
->simdclone
->inbranch
)
4014 this_badness
+= 2048;
4015 int target_badness
= targetm
.simd_clone
.usable (n
);
4016 if (target_badness
< 0)
4018 this_badness
+= target_badness
* 512;
4019 /* FORNOW: Have to add code to add the mask argument. */
4020 if (n
->simdclone
->inbranch
)
4022 for (i
= 0; i
< nargs
; i
++)
4024 switch (n
->simdclone
->args
[i
].arg_type
)
4026 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4027 if (!useless_type_conversion_p
4028 (n
->simdclone
->args
[i
].orig_type
,
4029 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4031 else if (arginfo
[i
].dt
== vect_constant_def
4032 || arginfo
[i
].dt
== vect_external_def
4033 || arginfo
[i
].linear_step
)
4036 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4037 if (arginfo
[i
].dt
!= vect_constant_def
4038 && arginfo
[i
].dt
!= vect_external_def
)
4041 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4042 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4043 if (arginfo
[i
].dt
== vect_constant_def
4044 || arginfo
[i
].dt
== vect_external_def
4045 || (arginfo
[i
].linear_step
4046 != n
->simdclone
->args
[i
].linear_step
))
4049 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4050 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4051 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4052 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4053 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4054 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4058 case SIMD_CLONE_ARG_TYPE_MASK
:
4061 if (i
== (size_t) -1)
4063 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4068 if (arginfo
[i
].align
)
4069 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4070 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4072 if (i
== (size_t) -1)
4074 if (bestn
== NULL
|| this_badness
< badness
)
4077 badness
= this_badness
;
4084 for (i
= 0; i
< nargs
; i
++)
4085 if ((arginfo
[i
].dt
== vect_constant_def
4086 || arginfo
[i
].dt
== vect_external_def
)
4087 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4089 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4090 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
);
4091 if (arginfo
[i
].vectype
== NULL
4092 || (simd_clone_subparts (arginfo
[i
].vectype
)
4093 > bestn
->simdclone
->simdlen
))
4097 fndecl
= bestn
->decl
;
4098 nunits
= bestn
->simdclone
->simdlen
;
4099 ncopies
= vf
/ nunits
;
4101 /* If the function isn't const, only allow it in simd loops where user
4102 has asserted that at least nunits consecutive iterations can be
4103 performed using SIMD instructions. */
4104 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4105 && gimple_vuse (stmt
))
4108 /* Sanity check: make sure that at least one copy of the vectorized stmt
4109 needs to be generated. */
4110 gcc_assert (ncopies
>= 1);
4112 if (!vec_stmt
) /* transformation not required. */
4114 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4115 for (i
= 0; i
< nargs
; i
++)
4116 if ((bestn
->simdclone
->args
[i
].arg_type
4117 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4118 || (bestn
->simdclone
->args
[i
].arg_type
4119 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4121 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4123 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4124 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4125 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4126 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4127 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4128 tree sll
= arginfo
[i
].simd_lane_linear
4129 ? boolean_true_node
: boolean_false_node
;
4130 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4132 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4133 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4134 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4140 if (dump_enabled_p ())
4141 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4144 scalar_dest
= gimple_call_lhs (stmt
);
4145 vec_dest
= NULL_TREE
;
4150 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4151 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4152 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4155 rtype
= TREE_TYPE (ratype
);
4159 prev_stmt_info
= NULL
;
4160 for (j
= 0; j
< ncopies
; ++j
)
4162 /* Build argument list for the vectorized call. */
4164 vargs
.create (nargs
);
4168 for (i
= 0; i
< nargs
; i
++)
4170 unsigned int k
, l
, m
, o
;
4172 op
= gimple_call_arg (stmt
, i
);
4173 switch (bestn
->simdclone
->args
[i
].arg_type
)
4175 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4176 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4177 o
= nunits
/ simd_clone_subparts (atype
);
4178 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4180 if (simd_clone_subparts (atype
)
4181 < simd_clone_subparts (arginfo
[i
].vectype
))
4183 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4184 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4185 / simd_clone_subparts (atype
));
4186 gcc_assert ((k
& (k
- 1)) == 0);
4189 = vect_get_vec_def_for_operand (op
, stmt_info
);
4192 vec_oprnd0
= arginfo
[i
].op
;
4193 if ((m
& (k
- 1)) == 0)
4195 = vect_get_vec_def_for_stmt_copy (vinfo
,
4198 arginfo
[i
].op
= vec_oprnd0
;
4200 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4202 bitsize_int ((m
& (k
- 1)) * prec
));
4204 = gimple_build_assign (make_ssa_name (atype
),
4206 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4207 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4211 k
= (simd_clone_subparts (atype
)
4212 / simd_clone_subparts (arginfo
[i
].vectype
));
4213 gcc_assert ((k
& (k
- 1)) == 0);
4214 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4216 vec_alloc (ctor_elts
, k
);
4219 for (l
= 0; l
< k
; l
++)
4221 if (m
== 0 && l
== 0)
4223 = vect_get_vec_def_for_operand (op
, stmt_info
);
4226 = vect_get_vec_def_for_stmt_copy (vinfo
,
4228 arginfo
[i
].op
= vec_oprnd0
;
4231 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4235 vargs
.safe_push (vec_oprnd0
);
4238 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4240 = gimple_build_assign (make_ssa_name (atype
),
4242 vect_finish_stmt_generation (stmt_info
, new_stmt
,
4244 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4249 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4250 vargs
.safe_push (op
);
4252 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4253 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4258 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4259 &stmts
, true, NULL_TREE
);
4263 edge pe
= loop_preheader_edge (loop
);
4264 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4265 gcc_assert (!new_bb
);
4267 if (arginfo
[i
].simd_lane_linear
)
4269 vargs
.safe_push (arginfo
[i
].op
);
4272 tree phi_res
= copy_ssa_name (op
);
4273 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4274 loop_vinfo
->add_stmt (new_phi
);
4275 add_phi_arg (new_phi
, arginfo
[i
].op
,
4276 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4278 = POINTER_TYPE_P (TREE_TYPE (op
))
4279 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4280 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4281 ? sizetype
: TREE_TYPE (op
);
4283 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4285 tree tcst
= wide_int_to_tree (type
, cst
);
4286 tree phi_arg
= copy_ssa_name (op
);
4288 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4289 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4290 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4291 loop_vinfo
->add_stmt (new_stmt
);
4292 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4294 arginfo
[i
].op
= phi_res
;
4295 vargs
.safe_push (phi_res
);
4300 = POINTER_TYPE_P (TREE_TYPE (op
))
4301 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4302 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4303 ? sizetype
: TREE_TYPE (op
);
4305 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4307 tree tcst
= wide_int_to_tree (type
, cst
);
4308 new_temp
= make_ssa_name (TREE_TYPE (op
));
4310 = gimple_build_assign (new_temp
, code
,
4311 arginfo
[i
].op
, tcst
);
4312 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4313 vargs
.safe_push (new_temp
);
4316 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4317 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4318 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4319 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4320 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4321 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4327 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4330 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4332 new_temp
= create_tmp_var (ratype
);
4333 else if (simd_clone_subparts (vectype
)
4334 == simd_clone_subparts (rtype
))
4335 new_temp
= make_ssa_name (vec_dest
, new_call
);
4337 new_temp
= make_ssa_name (rtype
, new_call
);
4338 gimple_call_set_lhs (new_call
, new_temp
);
4340 stmt_vec_info new_stmt_info
4341 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
4345 if (simd_clone_subparts (vectype
) < nunits
)
4348 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4349 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4350 k
= nunits
/ simd_clone_subparts (vectype
);
4351 gcc_assert ((k
& (k
- 1)) == 0);
4352 for (l
= 0; l
< k
; l
++)
4357 t
= build_fold_addr_expr (new_temp
);
4358 t
= build2 (MEM_REF
, vectype
, t
,
4359 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4362 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4363 bitsize_int (prec
), bitsize_int (l
* prec
));
4365 = gimple_build_assign (make_ssa_name (vectype
), t
);
4367 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4369 if (j
== 0 && l
== 0)
4370 STMT_VINFO_VEC_STMT (stmt_info
)
4371 = *vec_stmt
= new_stmt_info
;
4373 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4375 prev_stmt_info
= new_stmt_info
;
4379 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4382 else if (simd_clone_subparts (vectype
) > nunits
)
4384 unsigned int k
= (simd_clone_subparts (vectype
)
4385 / simd_clone_subparts (rtype
));
4386 gcc_assert ((k
& (k
- 1)) == 0);
4387 if ((j
& (k
- 1)) == 0)
4388 vec_alloc (ret_ctor_elts
, k
);
4391 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4392 for (m
= 0; m
< o
; m
++)
4394 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4395 size_int (m
), NULL_TREE
, NULL_TREE
);
4397 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4399 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
4401 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4402 gimple_assign_lhs (new_stmt
));
4404 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4407 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4408 if ((j
& (k
- 1)) != k
- 1)
4410 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4412 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4414 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4416 if ((unsigned) j
== k
- 1)
4417 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4419 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4421 prev_stmt_info
= new_stmt_info
;
4426 tree t
= build_fold_addr_expr (new_temp
);
4427 t
= build2 (MEM_REF
, vectype
, t
,
4428 build_int_cst (TREE_TYPE (t
), 0));
4430 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4432 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4433 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4438 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4440 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4442 prev_stmt_info
= new_stmt_info
;
4447 /* The call in STMT might prevent it from being removed in dce.
4448 We however cannot remove it here, due to the way the ssa name
4449 it defines is mapped to the new definition. So just replace
4450 rhs of the statement with something harmless. */
4458 type
= TREE_TYPE (scalar_dest
);
4459 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4460 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4463 new_stmt
= gimple_build_nop ();
4464 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4465 unlink_stmt_vdef (stmt
);
4471 /* Function vect_gen_widened_results_half
4473 Create a vector stmt whose code, type, number of arguments, and result
4474 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4475 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4476 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4477 needs to be created (DECL is a function-decl of a target-builtin).
4478 STMT_INFO is the original scalar stmt that we are vectorizing. */
4481 vect_gen_widened_results_half (enum tree_code code
,
4483 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4484 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4485 stmt_vec_info stmt_info
)
4490 /* Generate half of the widened result: */
4491 if (code
== CALL_EXPR
)
4493 /* Target specific support */
4494 if (op_type
== binary_op
)
4495 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4497 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4498 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4499 gimple_call_set_lhs (new_stmt
, new_temp
);
4503 /* Generic support */
4504 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4505 if (op_type
!= binary_op
)
4507 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4508 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4509 gimple_assign_set_lhs (new_stmt
, new_temp
);
4511 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4517 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4518 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4519 containing scalar operand), and for the rest we get a copy with
4520 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4521 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4522 The vectors are collected into VEC_OPRNDS. */
4525 vect_get_loop_based_defs (tree
*oprnd
, stmt_vec_info stmt_info
,
4526 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4528 vec_info
*vinfo
= stmt_info
->vinfo
;
4531 /* Get first vector operand. */
4532 /* All the vector operands except the very first one (that is scalar oprnd)
4534 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4535 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt_info
);
4537 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4539 vec_oprnds
->quick_push (vec_oprnd
);
4541 /* Get second vector operand. */
4542 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4543 vec_oprnds
->quick_push (vec_oprnd
);
4547 /* For conversion in multiple steps, continue to get operands
4550 vect_get_loop_based_defs (oprnd
, stmt_info
, vec_oprnds
,
4551 multi_step_cvt
- 1);
4555 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4556 For multi-step conversions store the resulting vectors and call the function
4560 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4562 stmt_vec_info stmt_info
,
4564 gimple_stmt_iterator
*gsi
,
4565 slp_tree slp_node
, enum tree_code code
,
4566 stmt_vec_info
*prev_stmt_info
)
4569 tree vop0
, vop1
, new_tmp
, vec_dest
;
4571 vec_dest
= vec_dsts
.pop ();
4573 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4575 /* Create demotion operation. */
4576 vop0
= (*vec_oprnds
)[i
];
4577 vop1
= (*vec_oprnds
)[i
+ 1];
4578 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4579 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4580 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4581 stmt_vec_info new_stmt_info
4582 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4585 /* Store the resulting vector for next recursive call. */
4586 (*vec_oprnds
)[i
/2] = new_tmp
;
4589 /* This is the last step of the conversion sequence. Store the
4590 vectors in SLP_NODE or in vector info of the scalar statement
4591 (or in STMT_VINFO_RELATED_STMT chain). */
4593 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4596 if (!*prev_stmt_info
)
4597 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4599 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4601 *prev_stmt_info
= new_stmt_info
;
4606 /* For multi-step demotion operations we first generate demotion operations
4607 from the source type to the intermediate types, and then combine the
4608 results (stored in VEC_OPRNDS) in demotion operation to the destination
4612 /* At each level of recursion we have half of the operands we had at the
4614 vec_oprnds
->truncate ((i
+1)/2);
4615 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4616 stmt_info
, vec_dsts
, gsi
,
4617 slp_node
, VEC_PACK_TRUNC_EXPR
,
4621 vec_dsts
.quick_push (vec_dest
);
4625 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4626 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4627 STMT_INFO. For multi-step conversions store the resulting vectors and
4628 call the function recursively. */
4631 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4632 vec
<tree
> *vec_oprnds1
,
4633 stmt_vec_info stmt_info
, tree vec_dest
,
4634 gimple_stmt_iterator
*gsi
,
4635 enum tree_code code1
,
4636 enum tree_code code2
, tree decl1
,
4637 tree decl2
, int op_type
)
4640 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4641 gimple
*new_stmt1
, *new_stmt2
;
4642 vec
<tree
> vec_tmp
= vNULL
;
4644 vec_tmp
.create (vec_oprnds0
->length () * 2);
4645 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4647 if (op_type
== binary_op
)
4648 vop1
= (*vec_oprnds1
)[i
];
4652 /* Generate the two halves of promotion operation. */
4653 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4654 op_type
, vec_dest
, gsi
,
4656 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4657 op_type
, vec_dest
, gsi
,
4659 if (is_gimple_call (new_stmt1
))
4661 new_tmp1
= gimple_call_lhs (new_stmt1
);
4662 new_tmp2
= gimple_call_lhs (new_stmt2
);
4666 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4667 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4670 /* Store the results for the next step. */
4671 vec_tmp
.quick_push (new_tmp1
);
4672 vec_tmp
.quick_push (new_tmp2
);
4675 vec_oprnds0
->release ();
4676 *vec_oprnds0
= vec_tmp
;
4680 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4681 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4682 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4683 Return true if STMT_INFO is vectorizable in this way. */
4686 vectorizable_conversion (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4687 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4688 stmt_vector_for_cost
*cost_vec
)
4692 tree op0
, op1
= NULL_TREE
;
4693 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4694 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4695 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4696 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4697 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4699 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4701 stmt_vec_info prev_stmt_info
;
4702 poly_uint64 nunits_in
;
4703 poly_uint64 nunits_out
;
4704 tree vectype_out
, vectype_in
;
4706 tree lhs_type
, rhs_type
;
4707 enum { NARROW
, NONE
, WIDEN
} modifier
;
4708 vec
<tree
> vec_oprnds0
= vNULL
;
4709 vec
<tree
> vec_oprnds1
= vNULL
;
4711 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4712 vec_info
*vinfo
= stmt_info
->vinfo
;
4713 int multi_step_cvt
= 0;
4714 vec
<tree
> interm_types
= vNULL
;
4715 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4717 unsigned short fltsz
;
4719 /* Is STMT a vectorizable conversion? */
4721 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4724 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4728 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4732 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4735 code
= gimple_assign_rhs_code (stmt
);
4736 if (!CONVERT_EXPR_CODE_P (code
)
4737 && code
!= FIX_TRUNC_EXPR
4738 && code
!= FLOAT_EXPR
4739 && code
!= WIDEN_MULT_EXPR
4740 && code
!= WIDEN_LSHIFT_EXPR
)
4743 op_type
= TREE_CODE_LENGTH (code
);
4745 /* Check types of lhs and rhs. */
4746 scalar_dest
= gimple_assign_lhs (stmt
);
4747 lhs_type
= TREE_TYPE (scalar_dest
);
4748 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4750 op0
= gimple_assign_rhs1 (stmt
);
4751 rhs_type
= TREE_TYPE (op0
);
4753 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4754 && !((INTEGRAL_TYPE_P (lhs_type
)
4755 && INTEGRAL_TYPE_P (rhs_type
))
4756 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4757 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4760 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4761 && ((INTEGRAL_TYPE_P (lhs_type
)
4762 && !type_has_mode_precision_p (lhs_type
))
4763 || (INTEGRAL_TYPE_P (rhs_type
)
4764 && !type_has_mode_precision_p (rhs_type
))))
4766 if (dump_enabled_p ())
4767 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4768 "type conversion to/from bit-precision unsupported."
4773 /* Check the operands of the operation. */
4774 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4776 if (dump_enabled_p ())
4777 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4778 "use not simple.\n");
4781 if (op_type
== binary_op
)
4785 op1
= gimple_assign_rhs2 (stmt
);
4786 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4787 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4789 if (CONSTANT_CLASS_P (op0
))
4790 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4792 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4796 if (dump_enabled_p ())
4797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4798 "use not simple.\n");
4803 /* If op0 is an external or constant def, infer the vector type
4804 from the scalar type. */
4806 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
4808 gcc_assert (vectype_in
);
4811 if (dump_enabled_p ())
4812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4813 "no vectype for scalar type %T\n", rhs_type
);
4818 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4819 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4821 if (dump_enabled_p ())
4822 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4823 "can't convert between boolean and non "
4824 "boolean vectors %T\n", rhs_type
);
4829 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4830 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4831 if (known_eq (nunits_out
, nunits_in
))
4833 else if (multiple_p (nunits_out
, nunits_in
))
4837 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4841 /* Multiple types in SLP are handled by creating the appropriate number of
4842 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4846 else if (modifier
== NARROW
)
4847 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4849 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4851 /* Sanity check: make sure that at least one copy of the vectorized stmt
4852 needs to be generated. */
4853 gcc_assert (ncopies
>= 1);
4855 bool found_mode
= false;
4856 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4857 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4858 opt_scalar_mode rhs_mode_iter
;
4860 /* Supportable by target? */
4864 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4866 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4871 if (dump_enabled_p ())
4872 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4873 "conversion not supported by target.\n");
4877 if (supportable_widening_operation (code
, stmt_info
, vectype_out
,
4878 vectype_in
, &code1
, &code2
,
4879 &multi_step_cvt
, &interm_types
))
4881 /* Binary widening operation can only be supported directly by the
4883 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4887 if (code
!= FLOAT_EXPR
4888 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4891 fltsz
= GET_MODE_SIZE (lhs_mode
);
4892 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4894 rhs_mode
= rhs_mode_iter
.require ();
4895 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4899 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4900 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4901 if (cvt_type
== NULL_TREE
)
4904 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4906 if (!supportable_convert_operation (code
, vectype_out
,
4907 cvt_type
, &decl1
, &codecvt1
))
4910 else if (!supportable_widening_operation (code
, stmt_info
,
4911 vectype_out
, cvt_type
,
4912 &codecvt1
, &codecvt2
,
4917 gcc_assert (multi_step_cvt
== 0);
4919 if (supportable_widening_operation (NOP_EXPR
, stmt_info
, cvt_type
,
4920 vectype_in
, &code1
, &code2
,
4921 &multi_step_cvt
, &interm_types
))
4931 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4932 codecvt2
= ERROR_MARK
;
4936 interm_types
.safe_push (cvt_type
);
4937 cvt_type
= NULL_TREE
;
4942 gcc_assert (op_type
== unary_op
);
4943 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4944 &code1
, &multi_step_cvt
,
4948 if (code
!= FIX_TRUNC_EXPR
4949 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4953 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4954 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4955 if (cvt_type
== NULL_TREE
)
4957 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4960 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4961 &code1
, &multi_step_cvt
,
4970 if (!vec_stmt
) /* transformation not required. */
4972 DUMP_VECT_SCOPE ("vectorizable_conversion");
4973 if (modifier
== NONE
)
4975 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4976 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4979 else if (modifier
== NARROW
)
4981 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4982 /* The final packing step produces one vector result per copy. */
4983 unsigned int nvectors
4984 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
4985 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4986 multi_step_cvt
, cost_vec
);
4990 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4991 /* The initial unpacking step produces two vector results
4992 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4993 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4994 unsigned int nvectors
4996 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
4998 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4999 multi_step_cvt
, cost_vec
);
5001 interm_types
.release ();
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE
, vect_location
,
5008 "transform conversion. ncopies = %d.\n", ncopies
);
5010 if (op_type
== binary_op
)
5012 if (CONSTANT_CLASS_P (op0
))
5013 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5014 else if (CONSTANT_CLASS_P (op1
))
5015 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5018 /* In case of multi-step conversion, we first generate conversion operations
5019 to the intermediate types, and then from that types to the final one.
5020 We create vector destinations for the intermediate type (TYPES) received
5021 from supportable_*_operation, and store them in the correct order
5022 for future use in vect_create_vectorized_*_stmts (). */
5023 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5024 vec_dest
= vect_create_destination_var (scalar_dest
,
5025 (cvt_type
&& modifier
== WIDEN
)
5026 ? cvt_type
: vectype_out
);
5027 vec_dsts
.quick_push (vec_dest
);
5031 for (i
= interm_types
.length () - 1;
5032 interm_types
.iterate (i
, &intermediate_type
); i
--)
5034 vec_dest
= vect_create_destination_var (scalar_dest
,
5036 vec_dsts
.quick_push (vec_dest
);
5041 vec_dest
= vect_create_destination_var (scalar_dest
,
5043 ? vectype_out
: cvt_type
);
5047 if (modifier
== WIDEN
)
5049 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
5050 if (op_type
== binary_op
)
5051 vec_oprnds1
.create (1);
5053 else if (modifier
== NARROW
)
5054 vec_oprnds0
.create (
5055 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
5057 else if (code
== WIDEN_LSHIFT_EXPR
)
5058 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5061 prev_stmt_info
= NULL
;
5065 for (j
= 0; j
< ncopies
; j
++)
5068 vect_get_vec_defs (op0
, NULL
, stmt_info
, &vec_oprnds0
,
5071 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
5073 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5075 stmt_vec_info new_stmt_info
;
5076 /* Arguments are ready, create the new vector stmt. */
5077 if (code1
== CALL_EXPR
)
5079 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5080 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5081 gimple_call_set_lhs (new_stmt
, new_temp
);
5083 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5087 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5089 = gimple_build_assign (vec_dest
, code1
, vop0
);
5090 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5091 gimple_assign_set_lhs (new_stmt
, new_temp
);
5093 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5097 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5100 if (!prev_stmt_info
)
5101 STMT_VINFO_VEC_STMT (stmt_info
)
5102 = *vec_stmt
= new_stmt_info
;
5104 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5105 prev_stmt_info
= new_stmt_info
;
5112 /* In case the vectorization factor (VF) is bigger than the number
5113 of elements that we can fit in a vectype (nunits), we have to
5114 generate more than one vector stmt - i.e - we need to "unroll"
5115 the vector stmt by a factor VF/nunits. */
5116 for (j
= 0; j
< ncopies
; j
++)
5123 if (code
== WIDEN_LSHIFT_EXPR
)
5128 /* Store vec_oprnd1 for every vector stmt to be created
5129 for SLP_NODE. We check during the analysis that all
5130 the shift arguments are the same. */
5131 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5132 vec_oprnds1
.quick_push (vec_oprnd1
);
5134 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
,
5135 &vec_oprnds0
, NULL
, slp_node
);
5138 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
5139 &vec_oprnds1
, slp_node
);
5143 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt_info
);
5144 vec_oprnds0
.quick_push (vec_oprnd0
);
5145 if (op_type
== binary_op
)
5147 if (code
== WIDEN_LSHIFT_EXPR
)
5151 = vect_get_vec_def_for_operand (op1
, stmt_info
);
5152 vec_oprnds1
.quick_push (vec_oprnd1
);
5158 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5159 vec_oprnds0
.truncate (0);
5160 vec_oprnds0
.quick_push (vec_oprnd0
);
5161 if (op_type
== binary_op
)
5163 if (code
== WIDEN_LSHIFT_EXPR
)
5166 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5168 vec_oprnds1
.truncate (0);
5169 vec_oprnds1
.quick_push (vec_oprnd1
);
5173 /* Arguments are ready. Create the new vector stmts. */
5174 for (i
= multi_step_cvt
; i
>= 0; i
--)
5176 tree this_dest
= vec_dsts
[i
];
5177 enum tree_code c1
= code1
, c2
= code2
;
5178 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5183 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5184 &vec_oprnds1
, stmt_info
,
5186 c1
, c2
, decl1
, decl2
,
5190 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5192 stmt_vec_info new_stmt_info
;
5195 if (codecvt1
== CALL_EXPR
)
5197 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5198 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5199 gimple_call_set_lhs (new_stmt
, new_temp
);
5201 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5206 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5207 new_temp
= make_ssa_name (vec_dest
);
5209 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5211 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5216 new_stmt_info
= vinfo
->lookup_def (vop0
);
5219 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5222 if (!prev_stmt_info
)
5223 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5225 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5226 prev_stmt_info
= new_stmt_info
;
5231 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5235 /* In case the vectorization factor (VF) is bigger than the number
5236 of elements that we can fit in a vectype (nunits), we have to
5237 generate more than one vector stmt - i.e - we need to "unroll"
5238 the vector stmt by a factor VF/nunits. */
5239 for (j
= 0; j
< ncopies
; j
++)
5243 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5247 vec_oprnds0
.truncate (0);
5248 vect_get_loop_based_defs (&last_oprnd
, stmt_info
, &vec_oprnds0
,
5249 vect_pow2 (multi_step_cvt
) - 1);
5252 /* Arguments are ready. Create the new vector stmts. */
5254 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5256 if (codecvt1
== CALL_EXPR
)
5258 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5259 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5260 gimple_call_set_lhs (new_stmt
, new_temp
);
5261 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5265 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5266 new_temp
= make_ssa_name (vec_dest
);
5268 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5269 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5272 vec_oprnds0
[i
] = new_temp
;
5275 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5276 stmt_info
, vec_dsts
, gsi
,
5281 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5285 vec_oprnds0
.release ();
5286 vec_oprnds1
.release ();
5287 interm_types
.release ();
5292 /* Return true if we can assume from the scalar form of STMT_INFO that
5293 neither the scalar nor the vector forms will generate code. STMT_INFO
5294 is known not to involve a data reference. */
5297 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5299 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5303 tree lhs
= gimple_assign_lhs (stmt
);
5304 tree_code code
= gimple_assign_rhs_code (stmt
);
5305 tree rhs
= gimple_assign_rhs1 (stmt
);
5307 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5310 if (CONVERT_EXPR_CODE_P (code
))
5311 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5316 /* Function vectorizable_assignment.
5318 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5319 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5320 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5321 Return true if STMT_INFO is vectorizable in this way. */
5324 vectorizable_assignment (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5325 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5326 stmt_vector_for_cost
*cost_vec
)
5331 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5333 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5337 vec
<tree
> vec_oprnds
= vNULL
;
5339 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5340 vec_info
*vinfo
= stmt_info
->vinfo
;
5341 stmt_vec_info prev_stmt_info
= NULL
;
5342 enum tree_code code
;
5345 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5348 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5352 /* Is vectorizable assignment? */
5353 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5357 scalar_dest
= gimple_assign_lhs (stmt
);
5358 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5361 code
= gimple_assign_rhs_code (stmt
);
5362 if (gimple_assign_single_p (stmt
)
5363 || code
== PAREN_EXPR
5364 || CONVERT_EXPR_CODE_P (code
))
5365 op
= gimple_assign_rhs1 (stmt
);
5369 if (code
== VIEW_CONVERT_EXPR
)
5370 op
= TREE_OPERAND (op
, 0);
5372 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5373 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5375 /* Multiple types in SLP are handled by creating the appropriate number of
5376 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5381 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5383 gcc_assert (ncopies
>= 1);
5385 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5387 if (dump_enabled_p ())
5388 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5389 "use not simple.\n");
5393 /* We can handle NOP_EXPR conversions that do not change the number
5394 of elements or the vector size. */
5395 if ((CONVERT_EXPR_CODE_P (code
)
5396 || code
== VIEW_CONVERT_EXPR
)
5398 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5399 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5400 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5403 /* We do not handle bit-precision changes. */
5404 if ((CONVERT_EXPR_CODE_P (code
)
5405 || code
== VIEW_CONVERT_EXPR
)
5406 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5407 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5408 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5409 /* But a conversion that does not change the bit-pattern is ok. */
5410 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5411 > TYPE_PRECISION (TREE_TYPE (op
)))
5412 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5413 /* Conversion between boolean types of different sizes is
5414 a simple assignment in case their vectypes are same
5416 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5417 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5419 if (dump_enabled_p ())
5420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5421 "type conversion to/from bit-precision "
5426 if (!vec_stmt
) /* transformation not required. */
5428 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5429 DUMP_VECT_SCOPE ("vectorizable_assignment");
5430 if (!vect_nop_conversion_p (stmt_info
))
5431 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5437 if (dump_enabled_p ())
5438 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5441 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5444 for (j
= 0; j
< ncopies
; j
++)
5448 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
5450 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5452 /* Arguments are ready. create the new vector stmt. */
5453 stmt_vec_info new_stmt_info
= NULL
;
5454 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5456 if (CONVERT_EXPR_CODE_P (code
)
5457 || code
== VIEW_CONVERT_EXPR
)
5458 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5459 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5460 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5461 gimple_assign_set_lhs (new_stmt
, new_temp
);
5463 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5465 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5472 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5474 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5476 prev_stmt_info
= new_stmt_info
;
5479 vec_oprnds
.release ();
5484 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5485 either as shift by a scalar or by a vector. */
5488 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5491 machine_mode vec_mode
;
5496 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5500 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5502 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5504 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5506 || (optab_handler (optab
, TYPE_MODE (vectype
))
5507 == CODE_FOR_nothing
))
5511 vec_mode
= TYPE_MODE (vectype
);
5512 icode
= (int) optab_handler (optab
, vec_mode
);
5513 if (icode
== CODE_FOR_nothing
)
5520 /* Function vectorizable_shift.
5522 Check if STMT_INFO performs a shift operation that can be vectorized.
5523 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5524 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5525 Return true if STMT_INFO is vectorizable in this way. */
5528 vectorizable_shift (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5529 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5530 stmt_vector_for_cost
*cost_vec
)
5534 tree op0
, op1
= NULL
;
5535 tree vec_oprnd1
= NULL_TREE
;
5537 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5538 enum tree_code code
;
5539 machine_mode vec_mode
;
5543 machine_mode optab_op2_mode
;
5544 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5546 stmt_vec_info prev_stmt_info
;
5547 poly_uint64 nunits_in
;
5548 poly_uint64 nunits_out
;
5553 vec
<tree
> vec_oprnds0
= vNULL
;
5554 vec
<tree
> vec_oprnds1
= vNULL
;
5557 bool scalar_shift_arg
= true;
5558 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5559 vec_info
*vinfo
= stmt_info
->vinfo
;
5560 bool incompatible_op1_vectype_p
= false;
5562 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5565 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5566 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5570 /* Is STMT a vectorizable binary/unary operation? */
5571 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5575 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5578 code
= gimple_assign_rhs_code (stmt
);
5580 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5581 || code
== RROTATE_EXPR
))
5584 scalar_dest
= gimple_assign_lhs (stmt
);
5585 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5586 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5588 if (dump_enabled_p ())
5589 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5590 "bit-precision shifts not supported.\n");
5594 op0
= gimple_assign_rhs1 (stmt
);
5595 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5597 if (dump_enabled_p ())
5598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5599 "use not simple.\n");
5602 /* If op0 is an external or constant def, infer the vector type
5603 from the scalar type. */
5605 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
));
5607 gcc_assert (vectype
);
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5612 "no vectype for scalar type\n");
5616 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5617 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5618 if (maybe_ne (nunits_out
, nunits_in
))
5621 op1
= gimple_assign_rhs2 (stmt
);
5622 stmt_vec_info op1_def_stmt_info
;
5623 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
,
5624 &op1_def_stmt_info
))
5626 if (dump_enabled_p ())
5627 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5628 "use not simple.\n");
5632 /* Multiple types in SLP are handled by creating the appropriate number of
5633 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5638 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5640 gcc_assert (ncopies
>= 1);
5642 /* Determine whether the shift amount is a vector, or scalar. If the
5643 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5645 if ((dt
[1] == vect_internal_def
5646 || dt
[1] == vect_induction_def
5647 || dt
[1] == vect_nested_cycle
)
5649 scalar_shift_arg
= false;
5650 else if (dt
[1] == vect_constant_def
5651 || dt
[1] == vect_external_def
5652 || dt
[1] == vect_internal_def
)
5654 /* In SLP, need to check whether the shift count is the same,
5655 in loops if it is a constant or invariant, it is always
5659 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5660 stmt_vec_info slpstmt_info
;
5662 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5664 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5665 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5666 scalar_shift_arg
= false;
5669 /* For internal SLP defs we have to make sure we see scalar stmts
5670 for all vector elements.
5671 ??? For different vectors we could resort to a different
5672 scalar shift operand but code-generation below simply always
5674 if (dt
[1] == vect_internal_def
5675 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5677 scalar_shift_arg
= false;
5680 /* If the shift amount is computed by a pattern stmt we cannot
5681 use the scalar amount directly thus give up and use a vector
5683 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5684 scalar_shift_arg
= false;
5688 if (dump_enabled_p ())
5689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5690 "operand mode requires invariant argument.\n");
5694 /* Vector shifted by vector. */
5695 bool was_scalar_shift_arg
= scalar_shift_arg
;
5696 if (!scalar_shift_arg
)
5698 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5699 if (dump_enabled_p ())
5700 dump_printf_loc (MSG_NOTE
, vect_location
,
5701 "vector/vector shift/rotate found.\n");
5704 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
));
5705 incompatible_op1_vectype_p
5706 = (op1_vectype
== NULL_TREE
5707 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5708 TYPE_VECTOR_SUBPARTS (vectype
))
5709 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5710 if (incompatible_op1_vectype_p
5712 || SLP_TREE_DEF_TYPE
5713 (SLP_TREE_CHILDREN (slp_node
)[1]) != vect_constant_def
))
5715 if (dump_enabled_p ())
5716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5717 "unusable type for last operand in"
5718 " vector/vector shift/rotate.\n");
5722 /* See if the machine has a vector shifted by scalar insn and if not
5723 then see if it has a vector shifted by vector insn. */
5726 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5728 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5730 if (dump_enabled_p ())
5731 dump_printf_loc (MSG_NOTE
, vect_location
,
5732 "vector/scalar shift/rotate found.\n");
5736 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5738 && (optab_handler (optab
, TYPE_MODE (vectype
))
5739 != CODE_FOR_nothing
))
5741 scalar_shift_arg
= false;
5743 if (dump_enabled_p ())
5744 dump_printf_loc (MSG_NOTE
, vect_location
,
5745 "vector/vector shift/rotate found.\n");
5747 /* Unlike the other binary operators, shifts/rotates have
5748 the rhs being int, instead of the same type as the lhs,
5749 so make sure the scalar is the right type if we are
5750 dealing with vectors of long long/long/short/char. */
5751 if (dt
[1] == vect_constant_def
)
5754 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5756 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5759 if (vec_stmt
&& !slp_node
)
5761 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5762 op1
= vect_init_vector (stmt_info
, op1
,
5763 TREE_TYPE (vectype
), NULL
);
5770 /* Supportable by target? */
5773 if (dump_enabled_p ())
5774 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5778 vec_mode
= TYPE_MODE (vectype
);
5779 icode
= (int) optab_handler (optab
, vec_mode
);
5780 if (icode
== CODE_FOR_nothing
)
5782 if (dump_enabled_p ())
5783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5784 "op not supported by target.\n");
5785 /* Check only during analysis. */
5786 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5788 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5790 if (dump_enabled_p ())
5791 dump_printf_loc (MSG_NOTE
, vect_location
,
5792 "proceeding using word mode.\n");
5795 /* Worthwhile without SIMD support? Check only during analysis. */
5797 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5798 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5800 if (dump_enabled_p ())
5801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5802 "not worthwhile without SIMD support.\n");
5806 if (!vec_stmt
) /* transformation not required. */
5808 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5809 DUMP_VECT_SCOPE ("vectorizable_shift");
5810 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5816 if (dump_enabled_p ())
5817 dump_printf_loc (MSG_NOTE
, vect_location
,
5818 "transform binary/unary operation.\n");
5821 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5823 prev_stmt_info
= NULL
;
5824 for (j
= 0; j
< ncopies
; j
++)
5829 if (scalar_shift_arg
)
5831 /* Vector shl and shr insn patterns can be defined with scalar
5832 operand 2 (shift operand). In this case, use constant or loop
5833 invariant op1 directly, without extending it to vector mode
5835 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5836 if (!VECTOR_MODE_P (optab_op2_mode
))
5838 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_NOTE
, vect_location
,
5840 "operand 1 using scalar mode.\n");
5842 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5843 vec_oprnds1
.quick_push (vec_oprnd1
);
5846 /* Store vec_oprnd1 for every vector stmt to be created
5847 for SLP_NODE. We check during the analysis that all
5848 the shift arguments are the same.
5849 TODO: Allow different constants for different vector
5850 stmts generated for an SLP instance. */
5851 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5852 vec_oprnds1
.quick_push (vec_oprnd1
);
5856 else if (slp_node
&& incompatible_op1_vectype_p
)
5858 if (was_scalar_shift_arg
)
5860 /* If the argument was the same in all lanes create
5861 the correctly typed vector shift amount directly. */
5862 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5863 op1
= vect_init_vector (stmt_info
, op1
, TREE_TYPE (vectype
),
5864 !loop_vinfo
? gsi
: NULL
);
5865 vec_oprnd1
= vect_init_vector (stmt_info
, op1
, vectype
,
5866 !loop_vinfo
? gsi
: NULL
);
5867 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5868 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5869 vec_oprnds1
.quick_push (vec_oprnd1
);
5871 else if (dt
[1] == vect_constant_def
)
5873 /* Convert the scalar constant shift amounts in-place. */
5874 slp_tree shift
= SLP_TREE_CHILDREN (slp_node
)[1];
5875 gcc_assert (SLP_TREE_DEF_TYPE (shift
) == vect_constant_def
);
5876 for (unsigned i
= 0;
5877 i
< SLP_TREE_SCALAR_OPS (shift
).length (); ++i
)
5879 SLP_TREE_SCALAR_OPS (shift
)[i
]
5880 = fold_convert (TREE_TYPE (vectype
),
5881 SLP_TREE_SCALAR_OPS (shift
)[i
]);
5882 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift
)[i
])
5887 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5890 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5891 (a special case for certain kind of vector shifts); otherwise,
5892 operand 1 should be of a vector type (the usual case). */
5894 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5897 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
5901 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5903 /* Arguments are ready. Create the new vector stmt. */
5904 stmt_vec_info new_stmt_info
= NULL
;
5905 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5907 vop1
= vec_oprnds1
[i
];
5908 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5909 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5910 gimple_assign_set_lhs (new_stmt
, new_temp
);
5912 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5914 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5921 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5923 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5924 prev_stmt_info
= new_stmt_info
;
5927 vec_oprnds0
.release ();
5928 vec_oprnds1
.release ();
5934 /* Function vectorizable_operation.
5936 Check if STMT_INFO performs a binary, unary or ternary operation that can
5938 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5939 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5940 Return true if STMT_INFO is vectorizable in this way. */
5943 vectorizable_operation (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5944 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5945 stmt_vector_for_cost
*cost_vec
)
5949 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5951 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5952 enum tree_code code
, orig_code
;
5953 machine_mode vec_mode
;
5957 bool target_support_p
;
5958 enum vect_def_type dt
[3]
5959 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5961 stmt_vec_info prev_stmt_info
;
5962 poly_uint64 nunits_in
;
5963 poly_uint64 nunits_out
;
5965 int ncopies
, vec_num
;
5967 vec
<tree
> vec_oprnds0
= vNULL
;
5968 vec
<tree
> vec_oprnds1
= vNULL
;
5969 vec
<tree
> vec_oprnds2
= vNULL
;
5970 tree vop0
, vop1
, vop2
;
5971 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5972 vec_info
*vinfo
= stmt_info
->vinfo
;
5974 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5977 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5981 /* Is STMT a vectorizable binary/unary operation? */
5982 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5986 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5989 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5991 /* For pointer addition and subtraction, we should use the normal
5992 plus and minus for the vector operation. */
5993 if (code
== POINTER_PLUS_EXPR
)
5995 if (code
== POINTER_DIFF_EXPR
)
5998 /* Support only unary or binary operations. */
5999 op_type
= TREE_CODE_LENGTH (code
);
6000 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6002 if (dump_enabled_p ())
6003 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6004 "num. args = %d (not unary/binary/ternary op).\n",
6009 scalar_dest
= gimple_assign_lhs (stmt
);
6010 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6012 /* Most operations cannot handle bit-precision types without extra
6014 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
6015 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6016 /* Exception are bitwise binary operations. */
6017 && code
!= BIT_IOR_EXPR
6018 && code
!= BIT_XOR_EXPR
6019 && code
!= BIT_AND_EXPR
)
6021 if (dump_enabled_p ())
6022 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6023 "bit-precision arithmetic not supported.\n");
6027 op0
= gimple_assign_rhs1 (stmt
);
6028 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
6030 if (dump_enabled_p ())
6031 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6032 "use not simple.\n");
6035 /* If op0 is an external or constant def, infer the vector type
6036 from the scalar type. */
6039 /* For boolean type we cannot determine vectype by
6040 invariant value (don't know whether it is a vector
6041 of booleans or vector of integers). We use output
6042 vectype because operations on boolean don't change
6044 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6046 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6048 if (dump_enabled_p ())
6049 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6050 "not supported operation on bool value.\n");
6053 vectype
= vectype_out
;
6056 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
));
6059 gcc_assert (vectype
);
6062 if (dump_enabled_p ())
6063 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6064 "no vectype for scalar type %T\n",
6070 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6071 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6072 if (maybe_ne (nunits_out
, nunits_in
))
6075 if (op_type
== binary_op
|| op_type
== ternary_op
)
6077 op1
= gimple_assign_rhs2 (stmt
);
6078 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1]))
6080 if (dump_enabled_p ())
6081 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6082 "use not simple.\n");
6086 if (op_type
== ternary_op
)
6088 op2
= gimple_assign_rhs3 (stmt
);
6089 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2]))
6091 if (dump_enabled_p ())
6092 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6093 "use not simple.\n");
6098 /* Multiple types in SLP are handled by creating the appropriate number of
6099 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6104 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6108 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6112 gcc_assert (ncopies
>= 1);
6114 /* Shifts are handled in vectorizable_shift (). */
6115 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6116 || code
== RROTATE_EXPR
)
6119 /* Supportable by target? */
6121 vec_mode
= TYPE_MODE (vectype
);
6122 if (code
== MULT_HIGHPART_EXPR
)
6123 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6126 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6129 if (dump_enabled_p ())
6130 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6134 target_support_p
= (optab_handler (optab
, vec_mode
)
6135 != CODE_FOR_nothing
);
6138 if (!target_support_p
)
6140 if (dump_enabled_p ())
6141 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6142 "op not supported by target.\n");
6143 /* Check only during analysis. */
6144 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6145 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
6147 if (dump_enabled_p ())
6148 dump_printf_loc (MSG_NOTE
, vect_location
,
6149 "proceeding using word mode.\n");
6152 /* Worthwhile without SIMD support? Check only during analysis. */
6153 if (!VECTOR_MODE_P (vec_mode
)
6155 && !vect_worthwhile_without_simd_p (vinfo
, code
))
6157 if (dump_enabled_p ())
6158 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6159 "not worthwhile without SIMD support.\n");
6163 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6164 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6165 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6167 if (!vec_stmt
) /* transformation not required. */
6169 /* If this operation is part of a reduction, a fully-masked loop
6170 should only change the active lanes of the reduction chain,
6171 keeping the inactive lanes as-is. */
6173 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
6176 if (cond_fn
== IFN_LAST
6177 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6178 OPTIMIZE_FOR_SPEED
))
6180 if (dump_enabled_p ())
6181 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6182 "can't use a fully-masked loop because no"
6183 " conditional operation is available.\n");
6184 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
6187 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6191 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6192 DUMP_VECT_SCOPE ("vectorizable_operation");
6193 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6199 if (dump_enabled_p ())
6200 dump_printf_loc (MSG_NOTE
, vect_location
,
6201 "transform binary/unary operation.\n");
6203 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6205 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6206 vectors with unsigned elements, but the result is signed. So, we
6207 need to compute the MINUS_EXPR into vectype temporary and
6208 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6209 tree vec_cvt_dest
= NULL_TREE
;
6210 if (orig_code
== POINTER_DIFF_EXPR
)
6212 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6213 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6217 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6219 /* In case the vectorization factor (VF) is bigger than the number
6220 of elements that we can fit in a vectype (nunits), we have to generate
6221 more than one vector stmt - i.e - we need to "unroll" the
6222 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6223 from one copy of the vector stmt to the next, in the field
6224 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6225 stages to find the correct vector defs to be used when vectorizing
6226 stmts that use the defs of the current stmt. The example below
6227 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6228 we need to create 4 vectorized stmts):
6230 before vectorization:
6231 RELATED_STMT VEC_STMT
6235 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6237 RELATED_STMT VEC_STMT
6238 VS1_0: vx0 = memref0 VS1_1 -
6239 VS1_1: vx1 = memref1 VS1_2 -
6240 VS1_2: vx2 = memref2 VS1_3 -
6241 VS1_3: vx3 = memref3 - -
6242 S1: x = load - VS1_0
6245 step2: vectorize stmt S2 (done here):
6246 To vectorize stmt S2 we first need to find the relevant vector
6247 def for the first operand 'x'. This is, as usual, obtained from
6248 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6249 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6250 relevant vector def 'vx0'. Having found 'vx0' we can generate
6251 the vector stmt VS2_0, and as usual, record it in the
6252 STMT_VINFO_VEC_STMT of stmt S2.
6253 When creating the second copy (VS2_1), we obtain the relevant vector
6254 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6255 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6256 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6257 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6258 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6259 chain of stmts and pointers:
6260 RELATED_STMT VEC_STMT
6261 VS1_0: vx0 = memref0 VS1_1 -
6262 VS1_1: vx1 = memref1 VS1_2 -
6263 VS1_2: vx2 = memref2 VS1_3 -
6264 VS1_3: vx3 = memref3 - -
6265 S1: x = load - VS1_0
6266 VS2_0: vz0 = vx0 + v1 VS2_1 -
6267 VS2_1: vz1 = vx1 + v1 VS2_2 -
6268 VS2_2: vz2 = vx2 + v1 VS2_3 -
6269 VS2_3: vz3 = vx3 + v1 - -
6270 S2: z = x + 1 - VS2_0 */
6272 prev_stmt_info
= NULL
;
6273 for (j
= 0; j
< ncopies
; j
++)
6278 if (op_type
== binary_op
)
6279 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
6281 else if (op_type
== ternary_op
)
6285 auto_vec
<vec
<tree
> > vec_defs(3);
6286 vect_get_slp_defs (slp_node
, &vec_defs
);
6287 vec_oprnds0
= vec_defs
[0];
6288 vec_oprnds1
= vec_defs
[1];
6289 vec_oprnds2
= vec_defs
[2];
6293 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
6294 &vec_oprnds1
, NULL
);
6295 vect_get_vec_defs (op2
, NULL_TREE
, stmt_info
, &vec_oprnds2
,
6300 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
6305 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6306 if (op_type
== ternary_op
)
6308 tree vec_oprnd
= vec_oprnds2
.pop ();
6309 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6314 /* Arguments are ready. Create the new vector stmt. */
6315 stmt_vec_info new_stmt_info
= NULL
;
6316 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6318 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6319 ? vec_oprnds1
[i
] : NULL_TREE
);
6320 vop2
= ((op_type
== ternary_op
)
6321 ? vec_oprnds2
[i
] : NULL_TREE
);
6322 if (masked_loop_p
&& reduc_idx
>= 0)
6324 /* Perform the operation on active elements only and take
6325 inactive elements from the reduction chain input. */
6327 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6328 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6329 vectype
, i
* ncopies
+ j
);
6330 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6332 new_temp
= make_ssa_name (vec_dest
, call
);
6333 gimple_call_set_lhs (call
, new_temp
);
6334 gimple_call_set_nothrow (call
, true);
6336 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
6340 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6342 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6343 gimple_assign_set_lhs (new_stmt
, new_temp
);
6345 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6348 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6350 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6352 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6353 gimple_assign_set_lhs (new_stmt
, new_temp
);
6355 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6359 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6366 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6368 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6369 prev_stmt_info
= new_stmt_info
;
6372 vec_oprnds0
.release ();
6373 vec_oprnds1
.release ();
6374 vec_oprnds2
.release ();
6379 /* A helper function to ensure data reference DR_INFO's base alignment. */
6382 ensure_base_align (dr_vec_info
*dr_info
)
6384 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6387 if (dr_info
->base_misaligned
)
6389 tree base_decl
= dr_info
->base_decl
;
6391 // We should only be able to increase the alignment of a base object if
6392 // we know what its new alignment should be at compile time.
6393 unsigned HOST_WIDE_INT align_base_to
=
6394 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6396 if (decl_in_symtab_p (base_decl
))
6397 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6398 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6400 SET_DECL_ALIGN (base_decl
, align_base_to
);
6401 DECL_USER_ALIGN (base_decl
) = 1;
6403 dr_info
->base_misaligned
= false;
6408 /* Function get_group_alias_ptr_type.
6410 Return the alias type for the group starting at FIRST_STMT_INFO. */
6413 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6415 struct data_reference
*first_dr
, *next_dr
;
6417 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6418 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6419 while (next_stmt_info
)
6421 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6422 if (get_alias_set (DR_REF (first_dr
))
6423 != get_alias_set (DR_REF (next_dr
)))
6425 if (dump_enabled_p ())
6426 dump_printf_loc (MSG_NOTE
, vect_location
,
6427 "conflicting alias set types.\n");
6428 return ptr_type_node
;
6430 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6432 return reference_alias_ptr_type (DR_REF (first_dr
));
6436 /* Function scan_operand_equal_p.
6438 Helper function for check_scan_store. Compare two references
6439 with .GOMP_SIMD_LANE bases. */
6442 scan_operand_equal_p (tree ref1
, tree ref2
)
6444 tree ref
[2] = { ref1
, ref2
};
6445 poly_int64 bitsize
[2], bitpos
[2];
6446 tree offset
[2], base
[2];
6447 for (int i
= 0; i
< 2; ++i
)
6450 int unsignedp
, reversep
, volatilep
= 0;
6451 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6452 &offset
[i
], &mode
, &unsignedp
,
6453 &reversep
, &volatilep
);
6454 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6456 if (TREE_CODE (base
[i
]) == MEM_REF
6457 && offset
[i
] == NULL_TREE
6458 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6460 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6461 if (is_gimple_assign (def_stmt
)
6462 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6463 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6464 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6466 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6468 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6469 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6474 if (!operand_equal_p (base
[0], base
[1], 0))
6476 if (maybe_ne (bitsize
[0], bitsize
[1]))
6478 if (offset
[0] != offset
[1])
6480 if (!offset
[0] || !offset
[1])
6482 if (!operand_equal_p (offset
[0], offset
[1], 0))
6485 for (int i
= 0; i
< 2; ++i
)
6487 step
[i
] = integer_one_node
;
6488 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6490 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6491 if (is_gimple_assign (def_stmt
)
6492 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6493 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6496 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6497 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6500 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6502 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6503 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6505 tree rhs1
= NULL_TREE
;
6506 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6508 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6509 if (gimple_assign_cast_p (def_stmt
))
6510 rhs1
= gimple_assign_rhs1 (def_stmt
);
6512 else if (CONVERT_EXPR_P (offset
[i
]))
6513 rhs1
= TREE_OPERAND (offset
[i
], 0);
6515 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6516 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6517 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6518 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6521 if (!operand_equal_p (offset
[0], offset
[1], 0)
6522 || !operand_equal_p (step
[0], step
[1], 0))
6530 enum scan_store_kind
{
6531 /* Normal permutation. */
6532 scan_store_kind_perm
,
6534 /* Whole vector left shift permutation with zero init. */
6535 scan_store_kind_lshift_zero
,
6537 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6538 scan_store_kind_lshift_cond
6541 /* Function check_scan_store.
6543 Verify if we can perform the needed permutations or whole vector shifts.
6544 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6545 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6546 to do at each step. */
6549 scan_store_can_perm_p (tree vectype
, tree init
,
6550 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6552 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6553 unsigned HOST_WIDE_INT nunits
;
6554 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6556 int units_log2
= exact_log2 (nunits
);
6557 if (units_log2
<= 0)
6561 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6562 for (i
= 0; i
<= units_log2
; ++i
)
6564 unsigned HOST_WIDE_INT j
, k
;
6565 enum scan_store_kind kind
= scan_store_kind_perm
;
6566 vec_perm_builder
sel (nunits
, nunits
, 1);
6567 sel
.quick_grow (nunits
);
6568 if (i
== units_log2
)
6570 for (j
= 0; j
< nunits
; ++j
)
6571 sel
[j
] = nunits
- 1;
6575 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6577 for (k
= 0; j
< nunits
; ++j
, ++k
)
6578 sel
[j
] = nunits
+ k
;
6580 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6581 if (!can_vec_perm_const_p (vec_mode
, indices
))
6583 if (i
== units_log2
)
6586 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6588 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6590 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6591 /* Whole vector shifts shift in zeros, so if init is all zero
6592 constant, there is no need to do anything further. */
6593 if ((TREE_CODE (init
) != INTEGER_CST
6594 && TREE_CODE (init
) != REAL_CST
)
6595 || !initializer_zerop (init
))
6597 tree masktype
= truth_type_for (vectype
);
6598 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6600 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6603 kind
= whole_vector_shift_kind
;
6605 if (use_whole_vector
)
6607 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6608 use_whole_vector
->safe_grow_cleared (i
);
6609 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6610 use_whole_vector
->safe_push (kind
);
6618 /* Function check_scan_store.
6620 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6623 check_scan_store (stmt_vec_info stmt_info
, tree vectype
,
6624 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6625 vect_memory_access_type memory_access_type
)
6627 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6628 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6631 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6634 || memory_access_type
!= VMAT_CONTIGUOUS
6635 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6636 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6637 || loop_vinfo
== NULL
6638 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6639 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6640 || !integer_zerop (DR_OFFSET (dr_info
->dr
))
6641 || !integer_zerop (DR_INIT (dr_info
->dr
))
6642 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6643 || !alias_sets_conflict_p (get_alias_set (vectype
),
6644 get_alias_set (TREE_TYPE (ref_type
))))
6646 if (dump_enabled_p ())
6647 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6648 "unsupported OpenMP scan store.\n");
6652 /* We need to pattern match code built by OpenMP lowering and simplified
6653 by following optimizations into something we can handle.
6654 #pragma omp simd reduction(inscan,+:r)
6658 #pragma omp scan inclusive (r)
6661 shall have body with:
6662 // Initialization for input phase, store the reduction initializer:
6663 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6664 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6666 // Actual input phase:
6668 r.0_5 = D.2042[_20];
6671 // Initialization for scan phase:
6672 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6678 // Actual scan phase:
6680 r.1_8 = D.2042[_20];
6682 The "omp simd array" variable D.2042 holds the privatized copy used
6683 inside of the loop and D.2043 is another one that holds copies of
6684 the current original list item. The separate GOMP_SIMD_LANE ifn
6685 kinds are there in order to allow optimizing the initializer store
6686 and combiner sequence, e.g. if it is originally some C++ish user
6687 defined reduction, but allow the vectorizer to pattern recognize it
6688 and turn into the appropriate vectorized scan.
6690 For exclusive scan, this is slightly different:
6691 #pragma omp simd reduction(inscan,+:r)
6695 #pragma omp scan exclusive (r)
6698 shall have body with:
6699 // Initialization for input phase, store the reduction initializer:
6700 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6701 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6703 // Actual input phase:
6705 r.0_5 = D.2042[_20];
6708 // Initialization for scan phase:
6709 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6715 // Actual scan phase:
6717 r.1_8 = D.2044[_20];
6720 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6722 /* Match the D.2042[_21] = 0; store above. Just require that
6723 it is a constant or external definition store. */
6724 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6727 if (dump_enabled_p ())
6728 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6729 "unsupported OpenMP scan initializer store.\n");
6733 if (! loop_vinfo
->scan_map
)
6734 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6735 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6736 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6739 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6741 /* These stores can be vectorized normally. */
6745 if (rhs_dt
!= vect_internal_def
)
6748 if (dump_enabled_p ())
6749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6750 "unsupported OpenMP scan combiner pattern.\n");
6754 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6755 tree rhs
= gimple_assign_rhs1 (stmt
);
6756 if (TREE_CODE (rhs
) != SSA_NAME
)
6759 gimple
*other_store_stmt
= NULL
;
6760 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6761 bool inscan_var_store
6762 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6764 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6766 if (!inscan_var_store
)
6768 use_operand_p use_p
;
6769 imm_use_iterator iter
;
6770 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6772 gimple
*use_stmt
= USE_STMT (use_p
);
6773 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6775 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6776 || !is_gimple_assign (use_stmt
)
6777 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6779 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6781 other_store_stmt
= use_stmt
;
6783 if (other_store_stmt
== NULL
)
6785 rhs
= gimple_assign_lhs (other_store_stmt
);
6786 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6790 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6792 use_operand_p use_p
;
6793 imm_use_iterator iter
;
6794 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6796 gimple
*use_stmt
= USE_STMT (use_p
);
6797 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6799 if (other_store_stmt
)
6801 other_store_stmt
= use_stmt
;
6807 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6808 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6809 || !is_gimple_assign (def_stmt
)
6810 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6813 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6814 /* For pointer addition, we should use the normal plus for the vector
6818 case POINTER_PLUS_EXPR
:
6821 case MULT_HIGHPART_EXPR
:
6826 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6829 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6830 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6831 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6834 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6835 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6836 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6837 || !gimple_assign_load_p (load1_stmt
)
6838 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6839 || !gimple_assign_load_p (load2_stmt
))
6842 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6843 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6844 if (load1_stmt_info
== NULL
6845 || load2_stmt_info
== NULL
6846 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6847 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6848 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6849 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6852 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6854 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6855 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6856 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6858 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6860 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6864 use_operand_p use_p
;
6865 imm_use_iterator iter
;
6866 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6868 gimple
*use_stmt
= USE_STMT (use_p
);
6869 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6871 if (other_store_stmt
)
6873 other_store_stmt
= use_stmt
;
6877 if (other_store_stmt
== NULL
)
6879 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6880 || !gimple_store_p (other_store_stmt
))
6883 stmt_vec_info other_store_stmt_info
6884 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6885 if (other_store_stmt_info
== NULL
6886 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6887 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6890 gimple
*stmt1
= stmt
;
6891 gimple
*stmt2
= other_store_stmt
;
6892 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6893 std::swap (stmt1
, stmt2
);
6894 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6895 gimple_assign_rhs1 (load2_stmt
)))
6897 std::swap (rhs1
, rhs2
);
6898 std::swap (load1_stmt
, load2_stmt
);
6899 std::swap (load1_stmt_info
, load2_stmt_info
);
6901 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6902 gimple_assign_rhs1 (load1_stmt
)))
6905 tree var3
= NULL_TREE
;
6906 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6907 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6908 gimple_assign_rhs1 (load2_stmt
)))
6910 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6912 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6913 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6914 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6916 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6917 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6918 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6919 || lookup_attribute ("omp simd inscan exclusive",
6920 DECL_ATTRIBUTES (var3
)))
6924 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6925 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6926 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6929 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6930 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6931 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6932 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6933 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6934 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6937 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6938 std::swap (var1
, var2
);
6940 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6942 if (!lookup_attribute ("omp simd inscan exclusive",
6943 DECL_ATTRIBUTES (var1
)))
6948 if (loop_vinfo
->scan_map
== NULL
)
6950 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6954 /* The IL is as expected, now check if we can actually vectorize it.
6961 should be vectorized as (where _40 is the vectorized rhs
6962 from the D.2042[_21] = 0; store):
6963 _30 = MEM <vector(8) int> [(int *)&D.2043];
6964 _31 = MEM <vector(8) int> [(int *)&D.2042];
6965 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6967 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6968 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6970 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6971 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6972 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6974 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6975 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6977 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6978 MEM <vector(8) int> [(int *)&D.2043] = _39;
6979 MEM <vector(8) int> [(int *)&D.2042] = _38;
6986 should be vectorized as (where _40 is the vectorized rhs
6987 from the D.2042[_21] = 0; store):
6988 _30 = MEM <vector(8) int> [(int *)&D.2043];
6989 _31 = MEM <vector(8) int> [(int *)&D.2042];
6990 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6991 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6993 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6994 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6995 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6997 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6998 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6999 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7001 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7002 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7005 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7006 MEM <vector(8) int> [(int *)&D.2044] = _39;
7007 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7008 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7009 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7010 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7013 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7014 if (units_log2
== -1)
7021 /* Function vectorizable_scan_store.
7023 Helper of vectorizable_score, arguments like on vectorizable_store.
7024 Handle only the transformation, checking is done in check_scan_store. */
7027 vectorizable_scan_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7028 stmt_vec_info
*vec_stmt
, int ncopies
)
7030 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7031 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7032 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7033 vec_info
*vinfo
= stmt_info
->vinfo
;
7034 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7036 if (dump_enabled_p ())
7037 dump_printf_loc (MSG_NOTE
, vect_location
,
7038 "transform scan store. ncopies = %d\n", ncopies
);
7040 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7041 tree rhs
= gimple_assign_rhs1 (stmt
);
7042 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7044 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7045 bool inscan_var_store
7046 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7048 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7050 use_operand_p use_p
;
7051 imm_use_iterator iter
;
7052 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7054 gimple
*use_stmt
= USE_STMT (use_p
);
7055 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7057 rhs
= gimple_assign_lhs (use_stmt
);
7062 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7063 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7064 if (code
== POINTER_PLUS_EXPR
)
7066 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7067 && commutative_tree_code (code
));
7068 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7069 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7070 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7071 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7072 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7073 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7074 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7075 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7076 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7077 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7078 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7080 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7082 std::swap (rhs1
, rhs2
);
7083 std::swap (var1
, var2
);
7084 std::swap (load1_dr_info
, load2_dr_info
);
7087 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7090 unsigned HOST_WIDE_INT nunits
;
7091 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7093 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7094 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7095 gcc_assert (units_log2
> 0);
7096 auto_vec
<tree
, 16> perms
;
7097 perms
.quick_grow (units_log2
+ 1);
7098 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7099 for (int i
= 0; i
<= units_log2
; ++i
)
7101 unsigned HOST_WIDE_INT j
, k
;
7102 vec_perm_builder
sel (nunits
, nunits
, 1);
7103 sel
.quick_grow (nunits
);
7104 if (i
== units_log2
)
7105 for (j
= 0; j
< nunits
; ++j
)
7106 sel
[j
] = nunits
- 1;
7109 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7111 for (k
= 0; j
< nunits
; ++j
, ++k
)
7112 sel
[j
] = nunits
+ k
;
7114 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7115 if (!use_whole_vector
.is_empty ()
7116 && use_whole_vector
[i
] != scan_store_kind_perm
)
7118 if (zero_vec
== NULL_TREE
)
7119 zero_vec
= build_zero_cst (vectype
);
7120 if (masktype
== NULL_TREE
7121 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7122 masktype
= truth_type_for (vectype
);
7123 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7126 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7129 stmt_vec_info prev_stmt_info
= NULL
;
7130 tree vec_oprnd1
= NULL_TREE
;
7131 tree vec_oprnd2
= NULL_TREE
;
7132 tree vec_oprnd3
= NULL_TREE
;
7133 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7134 tree dataref_offset
= build_int_cst (ref_type
, 0);
7135 tree bump
= vect_get_data_ptr_increment (dr_info
, vectype
, VMAT_CONTIGUOUS
);
7136 tree ldataref_ptr
= NULL_TREE
;
7137 tree orig
= NULL_TREE
;
7138 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7139 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7140 for (int j
= 0; j
< ncopies
; j
++)
7142 stmt_vec_info new_stmt_info
;
7145 vec_oprnd1
= vect_get_vec_def_for_operand (*init
, stmt_info
);
7146 if (ldataref_ptr
== NULL
)
7147 vec_oprnd2
= vect_get_vec_def_for_operand (rhs1
, stmt_info
);
7148 vec_oprnd3
= vect_get_vec_def_for_operand (rhs2
, stmt_info
);
7153 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
7154 if (ldataref_ptr
== NULL
)
7155 vec_oprnd2
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd2
);
7156 vec_oprnd3
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd3
);
7157 if (!inscan_var_store
)
7158 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7163 vec_oprnd2
= make_ssa_name (vectype
);
7164 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7165 unshare_expr (ldataref_ptr
),
7167 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7168 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7169 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7170 if (prev_stmt_info
== NULL
)
7171 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7173 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7174 prev_stmt_info
= new_stmt_info
;
7177 tree v
= vec_oprnd2
;
7178 for (int i
= 0; i
< units_log2
; ++i
)
7180 tree new_temp
= make_ssa_name (vectype
);
7181 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7183 && (use_whole_vector
[i
]
7184 != scan_store_kind_perm
))
7185 ? zero_vec
: vec_oprnd1
, v
,
7187 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7188 if (prev_stmt_info
== NULL
)
7189 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7191 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7192 prev_stmt_info
= new_stmt_info
;
7194 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7196 /* Whole vector shift shifted in zero bits, but if *init
7197 is not initializer_zerop, we need to replace those elements
7198 with elements from vec_oprnd1. */
7199 tree_vector_builder
vb (masktype
, nunits
, 1);
7200 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7201 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7202 ? boolean_false_node
: boolean_true_node
);
7204 tree new_temp2
= make_ssa_name (vectype
);
7205 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7206 new_temp
, vec_oprnd1
);
7207 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7208 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7209 prev_stmt_info
= new_stmt_info
;
7210 new_temp
= new_temp2
;
7213 /* For exclusive scan, perform the perms[i] permutation once
7216 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7224 tree new_temp2
= make_ssa_name (vectype
);
7225 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7226 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7227 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7228 prev_stmt_info
= new_stmt_info
;
7233 tree new_temp
= make_ssa_name (vectype
);
7234 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7235 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7236 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7237 prev_stmt_info
= new_stmt_info
;
7239 tree last_perm_arg
= new_temp
;
7240 /* For exclusive scan, new_temp computed above is the exclusive scan
7241 prefix sum. Turn it into inclusive prefix sum for the broadcast
7242 of the last element into orig. */
7243 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7245 last_perm_arg
= make_ssa_name (vectype
);
7246 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7247 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7248 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7249 prev_stmt_info
= new_stmt_info
;
7252 orig
= make_ssa_name (vectype
);
7253 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7254 last_perm_arg
, perms
[units_log2
]);
7255 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7256 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7257 prev_stmt_info
= new_stmt_info
;
7259 if (!inscan_var_store
)
7261 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7262 unshare_expr (dataref_ptr
),
7264 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7265 g
= gimple_build_assign (data_ref
, new_temp
);
7266 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7267 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7268 prev_stmt_info
= new_stmt_info
;
7272 if (inscan_var_store
)
7273 for (int j
= 0; j
< ncopies
; j
++)
7276 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7278 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7279 unshare_expr (dataref_ptr
),
7281 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7282 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7283 stmt_vec_info new_stmt_info
7284 = vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7285 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7286 prev_stmt_info
= new_stmt_info
;
7292 /* Function vectorizable_store.
7294 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7295 that can be vectorized.
7296 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7297 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7298 Return true if STMT_INFO is vectorizable in this way. */
7301 vectorizable_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7302 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7303 stmt_vector_for_cost
*cost_vec
)
7307 tree vec_oprnd
= NULL_TREE
;
7309 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7310 class loop
*loop
= NULL
;
7311 machine_mode vec_mode
;
7313 enum dr_alignment_support alignment_support_scheme
;
7314 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7315 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7316 stmt_vec_info prev_stmt_info
= NULL
;
7317 tree dataref_ptr
= NULL_TREE
;
7318 tree dataref_offset
= NULL_TREE
;
7319 gimple
*ptr_incr
= NULL
;
7322 stmt_vec_info first_stmt_info
;
7324 unsigned int group_size
, i
;
7325 vec
<tree
> oprnds
= vNULL
;
7326 vec
<tree
> result_chain
= vNULL
;
7327 tree offset
= NULL_TREE
;
7328 vec
<tree
> vec_oprnds
= vNULL
;
7329 bool slp
= (slp_node
!= NULL
);
7330 unsigned int vec_num
;
7331 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7332 vec_info
*vinfo
= stmt_info
->vinfo
;
7334 gather_scatter_info gs_info
;
7336 vec_load_store_type vls_type
;
7339 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7342 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7346 /* Is vectorizable store? */
7348 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7349 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7351 tree scalar_dest
= gimple_assign_lhs (assign
);
7352 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7353 && is_pattern_stmt_p (stmt_info
))
7354 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7355 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7356 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7357 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7358 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7359 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7360 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7361 && TREE_CODE (scalar_dest
) != MEM_REF
)
7366 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7367 if (!call
|| !gimple_call_internal_p (call
))
7370 internal_fn ifn
= gimple_call_internal_fn (call
);
7371 if (!internal_store_fn_p (ifn
))
7374 if (slp_node
!= NULL
)
7376 if (dump_enabled_p ())
7377 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7378 "SLP of masked stores not supported.\n");
7382 int mask_index
= internal_fn_mask_index (ifn
);
7383 if (mask_index
>= 0)
7385 mask
= gimple_call_arg (call
, mask_index
);
7386 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
7392 op
= vect_get_store_rhs (stmt_info
);
7394 /* Cannot have hybrid store SLP -- that would mean storing to the
7395 same location twice. */
7396 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7398 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7399 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7403 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7404 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7409 /* Multiple types in SLP are handled by creating the appropriate number of
7410 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7415 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7417 gcc_assert (ncopies
>= 1);
7419 /* FORNOW. This restriction should be relaxed. */
7420 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7422 if (dump_enabled_p ())
7423 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7424 "multiple types in nested loop.\n");
7428 if (!vect_check_store_rhs (stmt_info
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7431 elem_type
= TREE_TYPE (vectype
);
7432 vec_mode
= TYPE_MODE (vectype
);
7434 if (!STMT_VINFO_DATA_REF (stmt_info
))
7437 vect_memory_access_type memory_access_type
;
7438 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, vls_type
, ncopies
,
7439 &memory_access_type
, &gs_info
))
7444 if (memory_access_type
== VMAT_CONTIGUOUS
)
7446 if (!VECTOR_MODE_P (vec_mode
)
7447 || !can_vec_mask_load_store_p (vec_mode
,
7448 TYPE_MODE (mask_vectype
), false))
7451 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7452 && (memory_access_type
!= VMAT_GATHER_SCATTER
7453 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7455 if (dump_enabled_p ())
7456 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7457 "unsupported access type for masked store.\n");
7463 /* FORNOW. In some cases can vectorize even if data-type not supported
7464 (e.g. - array initialization with 0). */
7465 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7469 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7470 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7471 && memory_access_type
!= VMAT_GATHER_SCATTER
7472 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7475 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7476 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7477 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7481 first_stmt_info
= stmt_info
;
7482 first_dr_info
= dr_info
;
7483 group_size
= vec_num
= 1;
7486 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7488 if (!check_scan_store (stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7489 memory_access_type
))
7493 if (!vec_stmt
) /* transformation not required. */
7495 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7498 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7499 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
7500 memory_access_type
, &gs_info
, mask
);
7502 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7503 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
7504 vls_type
, slp_node
, cost_vec
);
7507 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7511 ensure_base_align (dr_info
);
7513 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7515 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7516 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7517 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7518 tree ptr
, var
, scale
, vec_mask
;
7519 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7520 tree mask_halfvectype
= mask_vectype
;
7521 edge pe
= loop_preheader_edge (loop
);
7524 enum { NARROW
, NONE
, WIDEN
} modifier
;
7525 poly_uint64 scatter_off_nunits
7526 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7528 if (known_eq (nunits
, scatter_off_nunits
))
7530 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7534 /* Currently gathers and scatters are only supported for
7535 fixed-length vectors. */
7536 unsigned int count
= scatter_off_nunits
.to_constant ();
7537 vec_perm_builder
sel (count
, count
, 1);
7538 for (i
= 0; i
< (unsigned int) count
; ++i
)
7539 sel
.quick_push (i
| (count
/ 2));
7541 vec_perm_indices
indices (sel
, 1, count
);
7542 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7544 gcc_assert (perm_mask
!= NULL_TREE
);
7546 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7550 /* Currently gathers and scatters are only supported for
7551 fixed-length vectors. */
7552 unsigned int count
= nunits
.to_constant ();
7553 vec_perm_builder
sel (count
, count
, 1);
7554 for (i
= 0; i
< (unsigned int) count
; ++i
)
7555 sel
.quick_push (i
| (count
/ 2));
7557 vec_perm_indices
indices (sel
, 2, count
);
7558 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7559 gcc_assert (perm_mask
!= NULL_TREE
);
7563 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7568 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7569 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7570 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7571 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7572 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7573 scaletype
= TREE_VALUE (arglist
);
7575 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7576 && TREE_CODE (rettype
) == VOID_TYPE
);
7578 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7579 if (!is_gimple_min_invariant (ptr
))
7581 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7582 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7583 gcc_assert (!new_bb
);
7586 if (mask
== NULL_TREE
)
7588 mask_arg
= build_int_cst (masktype
, -1);
7589 mask_arg
= vect_init_vector (stmt_info
, mask_arg
, masktype
, NULL
);
7592 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7594 prev_stmt_info
= NULL
;
7595 for (j
= 0; j
< ncopies
; ++j
)
7599 src
= vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt_info
);
7600 op
= vec_oprnd0
= vect_get_vec_def_for_operand (gs_info
.offset
,
7603 mask_op
= vec_mask
= vect_get_vec_def_for_operand (mask
,
7606 else if (modifier
!= NONE
&& (j
& 1))
7608 if (modifier
== WIDEN
)
7611 = vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7613 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
7617 = vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7620 else if (modifier
== NARROW
)
7622 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
7624 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7632 src
= vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7634 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7637 mask_op
= vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7641 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7643 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7644 TYPE_VECTOR_SUBPARTS (srctype
)));
7645 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7646 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7648 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7649 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7653 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7655 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7656 TYPE_VECTOR_SUBPARTS (idxtype
)));
7657 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7658 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7660 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7661 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7669 if (modifier
== NARROW
)
7671 var
= vect_get_new_ssa_name (mask_halfvectype
,
7674 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7675 : VEC_UNPACK_LO_EXPR
,
7677 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7680 tree optype
= TREE_TYPE (mask_arg
);
7681 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7684 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7685 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7686 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7688 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7689 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7691 if (!useless_type_conversion_p (masktype
, utype
))
7693 gcc_assert (TYPE_PRECISION (utype
)
7694 <= TYPE_PRECISION (masktype
));
7695 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7696 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7697 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7703 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7704 stmt_vec_info new_stmt_info
7705 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7707 if (prev_stmt_info
== NULL
)
7708 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7710 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7711 prev_stmt_info
= new_stmt_info
;
7715 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7716 return vectorizable_scan_store (stmt_info
, gsi
, vec_stmt
, ncopies
);
7718 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7719 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7724 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7726 /* We vectorize all the stmts of the interleaving group when we
7727 reach the last stmt in the group. */
7728 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7729 < DR_GROUP_SIZE (first_stmt_info
)
7738 grouped_store
= false;
7739 /* VEC_NUM is the number of vect stmts to be created for this
7741 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7742 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7743 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7744 == first_stmt_info
);
7745 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7746 op
= vect_get_store_rhs (first_stmt_info
);
7749 /* VEC_NUM is the number of vect stmts to be created for this
7751 vec_num
= group_size
;
7753 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7756 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7758 if (dump_enabled_p ())
7759 dump_printf_loc (MSG_NOTE
, vect_location
,
7760 "transform store. ncopies = %d\n", ncopies
);
7762 if (memory_access_type
== VMAT_ELEMENTWISE
7763 || memory_access_type
== VMAT_STRIDED_SLP
)
7765 gimple_stmt_iterator incr_gsi
;
7771 tree stride_base
, stride_step
, alias_off
;
7774 /* Checked by get_load_store_type. */
7775 unsigned int const_nunits
= nunits
.to_constant ();
7777 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7778 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7781 = fold_build_pointer_plus
7782 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7783 size_binop (PLUS_EXPR
,
7784 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
7785 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7786 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7788 /* For a store with loop-invariant (but other than power-of-2)
7789 stride (i.e. not a grouped access) like so:
7791 for (i = 0; i < n; i += stride)
7794 we generate a new induction variable and new stores from
7795 the components of the (vectorized) rhs:
7797 for (j = 0; ; j += VF*stride)
7802 array[j + stride] = tmp2;
7806 unsigned nstores
= const_nunits
;
7808 tree ltype
= elem_type
;
7809 tree lvectype
= vectype
;
7812 if (group_size
< const_nunits
7813 && const_nunits
% group_size
== 0)
7815 nstores
= const_nunits
/ group_size
;
7817 ltype
= build_vector_type (elem_type
, group_size
);
7820 /* First check if vec_extract optab doesn't support extraction
7821 of vector elts directly. */
7822 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7824 if (!related_vector_mode (TYPE_MODE (vectype
), elmode
,
7825 group_size
).exists (&vmode
)
7826 || (convert_optab_handler (vec_extract_optab
,
7827 TYPE_MODE (vectype
), vmode
)
7828 == CODE_FOR_nothing
))
7830 /* Try to avoid emitting an extract of vector elements
7831 by performing the extracts using an integer type of the
7832 same size, extracting from a vector of those and then
7833 re-interpreting it as the original vector type if
7836 = group_size
* GET_MODE_BITSIZE (elmode
);
7837 unsigned int lnunits
= const_nunits
/ group_size
;
7838 /* If we can't construct such a vector fall back to
7839 element extracts from the original vector type and
7840 element size stores. */
7841 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7842 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7843 lnunits
).exists (&vmode
)
7844 && (convert_optab_handler (vec_extract_optab
,
7846 != CODE_FOR_nothing
))
7850 ltype
= build_nonstandard_integer_type (lsize
, 1);
7851 lvectype
= build_vector_type (ltype
, nstores
);
7853 /* Else fall back to vector extraction anyway.
7854 Fewer stores are more important than avoiding spilling
7855 of the vector we extract from. Compared to the
7856 construction case in vectorizable_load no store-forwarding
7857 issue exists here for reasonable archs. */
7860 else if (group_size
>= const_nunits
7861 && group_size
% const_nunits
== 0)
7864 lnel
= const_nunits
;
7868 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7869 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7872 ivstep
= stride_step
;
7873 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7874 build_int_cst (TREE_TYPE (ivstep
), vf
));
7876 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7878 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7879 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7880 create_iv (stride_base
, ivstep
, NULL
,
7881 loop
, &incr_gsi
, insert_after
,
7883 incr
= gsi_stmt (incr_gsi
);
7884 loop_vinfo
->add_stmt (incr
);
7886 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7888 prev_stmt_info
= NULL
;
7889 alias_off
= build_int_cst (ref_type
, 0);
7890 stmt_vec_info next_stmt_info
= first_stmt_info
;
7891 for (g
= 0; g
< group_size
; g
++)
7893 running_off
= offvar
;
7896 tree size
= TYPE_SIZE_UNIT (ltype
);
7897 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7899 tree newoff
= copy_ssa_name (running_off
, NULL
);
7900 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7902 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7903 running_off
= newoff
;
7905 unsigned int group_el
= 0;
7906 unsigned HOST_WIDE_INT
7907 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7908 for (j
= 0; j
< ncopies
; j
++)
7910 /* We've set op and dt above, from vect_get_store_rhs,
7911 and first_stmt_info == stmt_info. */
7916 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
,
7917 &vec_oprnds
, NULL
, slp_node
);
7918 vec_oprnd
= vec_oprnds
[0];
7922 op
= vect_get_store_rhs (next_stmt_info
);
7923 vec_oprnd
= vect_get_vec_def_for_operand
7924 (op
, next_stmt_info
);
7930 vec_oprnd
= vec_oprnds
[j
];
7932 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
7935 /* Pun the vector to extract from if necessary. */
7936 if (lvectype
!= vectype
)
7938 tree tem
= make_ssa_name (lvectype
);
7940 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7941 lvectype
, vec_oprnd
));
7942 vect_finish_stmt_generation (stmt_info
, pun
, gsi
);
7945 for (i
= 0; i
< nstores
; i
++)
7947 tree newref
, newoff
;
7948 gimple
*incr
, *assign
;
7949 tree size
= TYPE_SIZE (ltype
);
7950 /* Extract the i'th component. */
7951 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7952 bitsize_int (i
), size
);
7953 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7956 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7960 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7962 newref
= build2 (MEM_REF
, ltype
,
7963 running_off
, this_off
);
7964 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7966 /* And store it to *running_off. */
7967 assign
= gimple_build_assign (newref
, elem
);
7968 stmt_vec_info assign_info
7969 = vect_finish_stmt_generation (stmt_info
, assign
, gsi
);
7973 || group_el
== group_size
)
7975 newoff
= copy_ssa_name (running_off
, NULL
);
7976 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7977 running_off
, stride_step
);
7978 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7980 running_off
= newoff
;
7983 if (g
== group_size
- 1
7986 if (j
== 0 && i
== 0)
7987 STMT_VINFO_VEC_STMT (stmt_info
)
7988 = *vec_stmt
= assign_info
;
7990 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
7991 prev_stmt_info
= assign_info
;
7995 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8000 vec_oprnds
.release ();
8004 auto_vec
<tree
> dr_chain (group_size
);
8005 oprnds
.create (group_size
);
8007 alignment_support_scheme
8008 = vect_supportable_dr_alignment (first_dr_info
, false);
8009 gcc_assert (alignment_support_scheme
);
8010 vec_loop_masks
*loop_masks
8011 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8012 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8014 /* Targets with store-lane instructions must not require explicit
8015 realignment. vect_supportable_dr_alignment always returns either
8016 dr_aligned or dr_unaligned_supported for masked operations. */
8017 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8020 || alignment_support_scheme
== dr_aligned
8021 || alignment_support_scheme
== dr_unaligned_supported
);
8023 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
8024 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8025 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8028 tree vec_offset
= NULL_TREE
;
8029 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8031 aggr_type
= NULL_TREE
;
8034 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8036 aggr_type
= elem_type
;
8037 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8038 &bump
, &vec_offset
);
8042 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8043 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8045 aggr_type
= vectype
;
8046 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
8047 memory_access_type
);
8051 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8053 /* In case the vectorization factor (VF) is bigger than the number
8054 of elements that we can fit in a vectype (nunits), we have to generate
8055 more than one vector stmt - i.e - we need to "unroll" the
8056 vector stmt by a factor VF/nunits. For more details see documentation in
8057 vect_get_vec_def_for_copy_stmt. */
8059 /* In case of interleaving (non-unit grouped access):
8066 We create vectorized stores starting from base address (the access of the
8067 first stmt in the chain (S2 in the above example), when the last store stmt
8068 of the chain (S4) is reached:
8071 VS2: &base + vec_size*1 = vx0
8072 VS3: &base + vec_size*2 = vx1
8073 VS4: &base + vec_size*3 = vx3
8075 Then permutation statements are generated:
8077 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8078 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8081 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8082 (the order of the data-refs in the output of vect_permute_store_chain
8083 corresponds to the order of scalar stmts in the interleaving chain - see
8084 the documentation of vect_permute_store_chain()).
8086 In case of both multiple types and interleaving, above vector stores and
8087 permutation stmts are created for every copy. The result vector stmts are
8088 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8089 STMT_VINFO_RELATED_STMT for the next copies.
8092 prev_stmt_info
= NULL
;
8093 tree vec_mask
= NULL_TREE
;
8094 for (j
= 0; j
< ncopies
; j
++)
8096 stmt_vec_info new_stmt_info
;
8101 /* Get vectorized arguments for SLP_NODE. */
8102 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
8105 vec_oprnd
= vec_oprnds
[0];
8109 /* For interleaved stores we collect vectorized defs for all the
8110 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8111 used as an input to vect_permute_store_chain(), and OPRNDS as
8112 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8114 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8115 OPRNDS are of size 1. */
8116 stmt_vec_info next_stmt_info
= first_stmt_info
;
8117 for (i
= 0; i
< group_size
; i
++)
8119 /* Since gaps are not supported for interleaved stores,
8120 DR_GROUP_SIZE is the exact number of stmts in the chain.
8121 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8122 that there is no interleaving, DR_GROUP_SIZE is 1,
8123 and only one iteration of the loop will be executed. */
8124 op
= vect_get_store_rhs (next_stmt_info
);
8125 vec_oprnd
= vect_get_vec_def_for_operand
8126 (op
, next_stmt_info
);
8127 dr_chain
.quick_push (vec_oprnd
);
8128 oprnds
.quick_push (vec_oprnd
);
8129 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8132 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
8136 /* We should have catched mismatched types earlier. */
8137 gcc_assert (useless_type_conversion_p (vectype
,
8138 TREE_TYPE (vec_oprnd
)));
8139 bool simd_lane_access_p
8140 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8141 if (simd_lane_access_p
8143 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8144 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8145 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
8146 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8147 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8148 get_alias_set (TREE_TYPE (ref_type
))))
8150 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8151 dataref_offset
= build_int_cst (ref_type
, 0);
8153 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8154 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
8155 &dataref_ptr
, &vec_offset
);
8158 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
,
8159 simd_lane_access_p
? loop
: NULL
,
8160 offset
, &dummy
, gsi
, &ptr_incr
,
8161 simd_lane_access_p
, NULL_TREE
, bump
);
8165 /* For interleaved stores we created vectorized defs for all the
8166 defs stored in OPRNDS in the previous iteration (previous copy).
8167 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8168 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8170 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8171 OPRNDS are of size 1. */
8172 for (i
= 0; i
< group_size
; i
++)
8175 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
8176 dr_chain
[i
] = vec_oprnd
;
8177 oprnds
[i
] = vec_oprnd
;
8180 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8183 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8184 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8185 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8187 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8191 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8195 /* Get an array into which we can store the individual vectors. */
8196 vec_array
= create_vector_array (vectype
, vec_num
);
8198 /* Invalidate the current contents of VEC_ARRAY. This should
8199 become an RTL clobber too, which prevents the vector registers
8200 from being upward-exposed. */
8201 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8203 /* Store the individual vectors into the array. */
8204 for (i
= 0; i
< vec_num
; i
++)
8206 vec_oprnd
= dr_chain
[i
];
8207 write_vector_array (stmt_info
, gsi
, vec_oprnd
, vec_array
, i
);
8210 tree final_mask
= NULL
;
8212 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8215 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8222 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8224 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8225 tree alias_ptr
= build_int_cst (ref_type
, align
);
8226 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8227 dataref_ptr
, alias_ptr
,
8228 final_mask
, vec_array
);
8233 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8234 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8235 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8237 gimple_call_set_lhs (call
, data_ref
);
8239 gimple_call_set_nothrow (call
, true);
8240 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8242 /* Record that VEC_ARRAY is now dead. */
8243 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8247 new_stmt_info
= NULL
;
8251 result_chain
.create (group_size
);
8253 vect_permute_store_chain (dr_chain
, group_size
, stmt_info
, gsi
,
8257 stmt_vec_info next_stmt_info
= first_stmt_info
;
8258 for (i
= 0; i
< vec_num
; i
++)
8261 unsigned HOST_WIDE_INT align
;
8263 tree final_mask
= NULL_TREE
;
8265 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8267 vectype
, vec_num
* j
+ i
);
8269 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8272 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8274 tree scale
= size_int (gs_info
.scale
);
8277 call
= gimple_build_call_internal
8278 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8279 scale
, vec_oprnd
, final_mask
);
8281 call
= gimple_build_call_internal
8282 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8284 gimple_call_set_nothrow (call
, true);
8286 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8291 /* Bump the vector pointer. */
8292 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8296 vec_oprnd
= vec_oprnds
[i
];
8297 else if (grouped_store
)
8298 /* For grouped stores vectorized defs are interleaved in
8299 vect_permute_store_chain(). */
8300 vec_oprnd
= result_chain
[i
];
8302 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8303 if (aligned_access_p (first_dr_info
))
8305 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8307 align
= dr_alignment (vect_dr_behavior (first_dr_info
));
8311 misalign
= DR_MISALIGNMENT (first_dr_info
);
8312 if (dataref_offset
== NULL_TREE
8313 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8314 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8317 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8319 tree perm_mask
= perm_mask_for_reverse (vectype
);
8320 tree perm_dest
= vect_create_destination_var
8321 (vect_get_store_rhs (stmt_info
), vectype
);
8322 tree new_temp
= make_ssa_name (perm_dest
);
8324 /* Generate the permute statement. */
8326 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8327 vec_oprnd
, perm_mask
);
8328 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
8330 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8331 vec_oprnd
= new_temp
;
8334 /* Arguments are ready. Create the new vector stmt. */
8337 align
= least_bit_hwi (misalign
| align
);
8338 tree ptr
= build_int_cst (ref_type
, align
);
8340 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8342 final_mask
, vec_oprnd
);
8343 gimple_call_set_nothrow (call
, true);
8345 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8349 data_ref
= fold_build2 (MEM_REF
, vectype
,
8353 : build_int_cst (ref_type
, 0));
8354 if (aligned_access_p (first_dr_info
))
8356 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8357 TREE_TYPE (data_ref
)
8358 = build_aligned_type (TREE_TYPE (data_ref
),
8359 align
* BITS_PER_UNIT
);
8361 TREE_TYPE (data_ref
)
8362 = build_aligned_type (TREE_TYPE (data_ref
),
8363 TYPE_ALIGN (elem_type
));
8364 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8366 = gimple_build_assign (data_ref
, vec_oprnd
);
8368 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8374 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8375 if (!next_stmt_info
)
8382 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8384 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8385 prev_stmt_info
= new_stmt_info
;
8390 result_chain
.release ();
8391 vec_oprnds
.release ();
8396 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8397 VECTOR_CST mask. No checks are made that the target platform supports the
8398 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8399 vect_gen_perm_mask_checked. */
8402 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8406 poly_uint64 nunits
= sel
.length ();
8407 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8409 mask_type
= build_vector_type (ssizetype
, nunits
);
8410 return vec_perm_indices_to_tree (mask_type
, sel
);
8413 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8414 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8417 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8419 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8420 return vect_gen_perm_mask_any (vectype
, sel
);
8423 /* Given a vector variable X and Y, that was generated for the scalar
8424 STMT_INFO, generate instructions to permute the vector elements of X and Y
8425 using permutation mask MASK_VEC, insert them at *GSI and return the
8426 permuted vector variable. */
8429 permute_vec_elements (tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8430 gimple_stmt_iterator
*gsi
)
8432 tree vectype
= TREE_TYPE (x
);
8433 tree perm_dest
, data_ref
;
8436 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8437 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8438 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8440 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8441 data_ref
= make_ssa_name (perm_dest
);
8443 /* Generate the permute statement. */
8444 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8445 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
8450 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8451 inserting them on the loops preheader edge. Returns true if we
8452 were successful in doing so (and thus STMT_INFO can be moved then),
8453 otherwise returns false. */
8456 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8462 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8464 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8465 if (!gimple_nop_p (def_stmt
)
8466 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8468 /* Make sure we don't need to recurse. While we could do
8469 so in simple cases when there are more complex use webs
8470 we don't have an easy way to preserve stmt order to fulfil
8471 dependencies within them. */
8474 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8476 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8478 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8479 if (!gimple_nop_p (def_stmt2
)
8480 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8490 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8492 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8493 if (!gimple_nop_p (def_stmt
)
8494 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8496 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8497 gsi_remove (&gsi
, false);
8498 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8505 /* vectorizable_load.
8507 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8508 that can be vectorized.
8509 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8510 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8511 Return true if STMT_INFO is vectorizable in this way. */
8514 vectorizable_load (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8515 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
8516 slp_instance slp_node_instance
,
8517 stmt_vector_for_cost
*cost_vec
)
8520 tree vec_dest
= NULL
;
8521 tree data_ref
= NULL
;
8522 stmt_vec_info prev_stmt_info
;
8523 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8524 class loop
*loop
= NULL
;
8525 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8526 bool nested_in_vect_loop
= false;
8531 enum dr_alignment_support alignment_support_scheme
;
8532 tree dataref_ptr
= NULL_TREE
;
8533 tree dataref_offset
= NULL_TREE
;
8534 gimple
*ptr_incr
= NULL
;
8537 unsigned int group_size
;
8538 poly_uint64 group_gap_adj
;
8539 tree msq
= NULL_TREE
, lsq
;
8540 tree offset
= NULL_TREE
;
8541 tree byte_offset
= NULL_TREE
;
8542 tree realignment_token
= NULL_TREE
;
8544 vec
<tree
> dr_chain
= vNULL
;
8545 bool grouped_load
= false;
8546 stmt_vec_info first_stmt_info
;
8547 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8548 bool compute_in_loop
= false;
8549 class loop
*at_loop
;
8551 bool slp
= (slp_node
!= NULL
);
8552 bool slp_perm
= false;
8553 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8556 gather_scatter_info gs_info
;
8557 vec_info
*vinfo
= stmt_info
->vinfo
;
8559 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8561 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8564 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8568 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8569 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8571 scalar_dest
= gimple_assign_lhs (assign
);
8572 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8575 tree_code code
= gimple_assign_rhs_code (assign
);
8576 if (code
!= ARRAY_REF
8577 && code
!= BIT_FIELD_REF
8578 && code
!= INDIRECT_REF
8579 && code
!= COMPONENT_REF
8580 && code
!= IMAGPART_EXPR
8581 && code
!= REALPART_EXPR
8583 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8588 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8589 if (!call
|| !gimple_call_internal_p (call
))
8592 internal_fn ifn
= gimple_call_internal_fn (call
);
8593 if (!internal_load_fn_p (ifn
))
8596 scalar_dest
= gimple_call_lhs (call
);
8600 int mask_index
= internal_fn_mask_index (ifn
);
8601 if (mask_index
>= 0)
8603 mask
= gimple_call_arg (call
, mask_index
);
8604 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
8610 if (!STMT_VINFO_DATA_REF (stmt_info
))
8613 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8614 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8618 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8619 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8620 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8625 /* Multiple types in SLP are handled by creating the appropriate number of
8626 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8631 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8633 gcc_assert (ncopies
>= 1);
8635 /* FORNOW. This restriction should be relaxed. */
8636 if (nested_in_vect_loop
&& ncopies
> 1)
8638 if (dump_enabled_p ())
8639 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8640 "multiple types in nested loop.\n");
8644 /* Invalidate assumptions made by dependence analysis when vectorization
8645 on the unrolled body effectively re-orders stmts. */
8647 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8648 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8649 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8651 if (dump_enabled_p ())
8652 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8653 "cannot perform implicit CSE when unrolling "
8654 "with negative dependence distance\n");
8658 elem_type
= TREE_TYPE (vectype
);
8659 mode
= TYPE_MODE (vectype
);
8661 /* FORNOW. In some cases can vectorize even if data-type not supported
8662 (e.g. - data copies). */
8663 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8665 if (dump_enabled_p ())
8666 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8667 "Aligned load, but unsupported type.\n");
8671 /* Check if the load is a part of an interleaving chain. */
8672 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8674 grouped_load
= true;
8676 gcc_assert (!nested_in_vect_loop
);
8677 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8679 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8680 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8682 /* Refuse non-SLP vectorization of SLP-only groups. */
8683 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8685 if (dump_enabled_p ())
8686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8687 "cannot vectorize load in non-SLP mode.\n");
8691 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8694 /* Invalidate assumptions made by dependence analysis when vectorization
8695 on the unrolled body effectively re-orders stmts. */
8696 if (!PURE_SLP_STMT (stmt_info
)
8697 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8698 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8699 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8701 if (dump_enabled_p ())
8702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8703 "cannot perform implicit CSE when performing "
8704 "group loads with negative dependence distance\n");
8711 vect_memory_access_type memory_access_type
;
8712 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
8713 &memory_access_type
, &gs_info
))
8718 if (memory_access_type
== VMAT_CONTIGUOUS
)
8720 machine_mode vec_mode
= TYPE_MODE (vectype
);
8721 if (!VECTOR_MODE_P (vec_mode
)
8722 || !can_vec_mask_load_store_p (vec_mode
,
8723 TYPE_MODE (mask_vectype
), true))
8726 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8727 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8729 if (dump_enabled_p ())
8730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8731 "unsupported access type for masked load.\n");
8736 if (!vec_stmt
) /* transformation not required. */
8739 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8742 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
8743 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
8744 memory_access_type
, &gs_info
, mask
);
8746 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8747 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
8748 slp_node_instance
, slp_node
, cost_vec
);
8753 gcc_assert (memory_access_type
8754 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8756 if (dump_enabled_p ())
8757 dump_printf_loc (MSG_NOTE
, vect_location
,
8758 "transform load. ncopies = %d\n", ncopies
);
8762 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8763 ensure_base_align (dr_info
);
8765 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8767 vect_build_gather_load_calls (stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8771 if (memory_access_type
== VMAT_INVARIANT
)
8773 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8774 /* If we have versioned for aliasing or the loop doesn't
8775 have any data dependencies that would preclude this,
8776 then we are sure this is a loop invariant load and
8777 thus we can insert it on the preheader edge. */
8778 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8779 && !nested_in_vect_loop
8780 && hoist_defs_of_uses (stmt_info
, loop
));
8783 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8784 if (dump_enabled_p ())
8785 dump_printf_loc (MSG_NOTE
, vect_location
,
8786 "hoisting out of the vectorized loop: %G", stmt
);
8787 scalar_dest
= copy_ssa_name (scalar_dest
);
8788 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8789 gsi_insert_on_edge_immediate
8790 (loop_preheader_edge (loop
),
8791 gimple_build_assign (scalar_dest
, rhs
));
8793 /* These copies are all equivalent, but currently the representation
8794 requires a separate STMT_VINFO_VEC_STMT for each one. */
8795 prev_stmt_info
= NULL
;
8796 gimple_stmt_iterator gsi2
= *gsi
;
8798 for (j
= 0; j
< ncopies
; j
++)
8800 stmt_vec_info new_stmt_info
;
8803 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8805 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8806 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
8810 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8812 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8815 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8817 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8819 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8820 prev_stmt_info
= new_stmt_info
;
8825 if (memory_access_type
== VMAT_ELEMENTWISE
8826 || memory_access_type
== VMAT_STRIDED_SLP
)
8828 gimple_stmt_iterator incr_gsi
;
8834 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8835 tree stride_base
, stride_step
, alias_off
;
8836 /* Checked by get_load_store_type. */
8837 unsigned int const_nunits
= nunits
.to_constant ();
8838 unsigned HOST_WIDE_INT cst_offset
= 0;
8840 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8841 gcc_assert (!nested_in_vect_loop
);
8845 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8846 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8850 first_stmt_info
= stmt_info
;
8851 first_dr_info
= dr_info
;
8853 if (slp
&& grouped_load
)
8855 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8856 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8862 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8863 * vect_get_place_in_interleaving_chain (stmt_info
,
8866 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8870 = fold_build_pointer_plus
8871 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8872 size_binop (PLUS_EXPR
,
8873 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
8874 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8875 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8877 /* For a load with loop-invariant (but other than power-of-2)
8878 stride (i.e. not a grouped access) like so:
8880 for (i = 0; i < n; i += stride)
8883 we generate a new induction variable and new accesses to
8884 form a new vector (or vectors, depending on ncopies):
8886 for (j = 0; ; j += VF*stride)
8888 tmp2 = array[j + stride];
8890 vectemp = {tmp1, tmp2, ...}
8893 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8894 build_int_cst (TREE_TYPE (stride_step
), vf
));
8896 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8898 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8899 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8900 create_iv (stride_base
, ivstep
, NULL
,
8901 loop
, &incr_gsi
, insert_after
,
8903 incr
= gsi_stmt (incr_gsi
);
8904 loop_vinfo
->add_stmt (incr
);
8906 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8908 prev_stmt_info
= NULL
;
8909 running_off
= offvar
;
8910 alias_off
= build_int_cst (ref_type
, 0);
8911 int nloads
= const_nunits
;
8913 tree ltype
= TREE_TYPE (vectype
);
8914 tree lvectype
= vectype
;
8915 auto_vec
<tree
> dr_chain
;
8916 if (memory_access_type
== VMAT_STRIDED_SLP
)
8918 if (group_size
< const_nunits
)
8920 /* First check if vec_init optab supports construction from
8921 vector elts directly. */
8922 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
8924 if (related_vector_mode (TYPE_MODE (vectype
), elmode
,
8925 group_size
).exists (&vmode
)
8926 && (convert_optab_handler (vec_init_optab
,
8927 TYPE_MODE (vectype
), vmode
)
8928 != CODE_FOR_nothing
))
8930 nloads
= const_nunits
/ group_size
;
8932 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
8936 /* Otherwise avoid emitting a constructor of vector elements
8937 by performing the loads using an integer type of the same
8938 size, constructing a vector of those and then
8939 re-interpreting it as the original vector type.
8940 This avoids a huge runtime penalty due to the general
8941 inability to perform store forwarding from smaller stores
8942 to a larger load. */
8944 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
8945 unsigned int lnunits
= const_nunits
/ group_size
;
8946 /* If we can't construct such a vector fall back to
8947 element loads of the original vector type. */
8948 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8949 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8950 lnunits
).exists (&vmode
)
8951 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
8952 != CODE_FOR_nothing
))
8956 ltype
= build_nonstandard_integer_type (lsize
, 1);
8957 lvectype
= build_vector_type (ltype
, nloads
);
8964 lnel
= const_nunits
;
8967 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8969 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8970 else if (nloads
== 1)
8975 /* For SLP permutation support we need to load the whole group,
8976 not only the number of vector stmts the permutation result
8980 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8982 unsigned int const_vf
= vf
.to_constant ();
8983 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
8984 dr_chain
.create (ncopies
);
8987 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8989 unsigned int group_el
= 0;
8990 unsigned HOST_WIDE_INT
8991 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8992 for (j
= 0; j
< ncopies
; j
++)
8995 vec_alloc (v
, nloads
);
8996 stmt_vec_info new_stmt_info
= NULL
;
8997 for (i
= 0; i
< nloads
; i
++)
8999 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9000 group_el
* elsz
+ cst_offset
);
9001 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9002 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9004 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9006 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9008 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9009 gimple_assign_lhs (new_stmt
));
9013 || group_el
== group_size
)
9015 tree newoff
= copy_ssa_name (running_off
);
9016 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9017 running_off
, stride_step
);
9018 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
9020 running_off
= newoff
;
9026 tree vec_inv
= build_constructor (lvectype
, v
);
9027 new_temp
= vect_init_vector (stmt_info
, vec_inv
, lvectype
, gsi
);
9028 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9029 if (lvectype
!= vectype
)
9032 = gimple_build_assign (make_ssa_name (vectype
),
9034 build1 (VIEW_CONVERT_EXPR
,
9035 vectype
, new_temp
));
9037 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9044 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
9046 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9051 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9053 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9054 prev_stmt_info
= new_stmt_info
;
9060 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
9061 slp_node_instance
, false, &n_perms
);
9066 if (memory_access_type
== VMAT_GATHER_SCATTER
9067 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9068 grouped_load
= false;
9072 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9073 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9074 /* For SLP vectorization we directly vectorize a subchain
9075 without permutation. */
9076 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9077 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9078 /* For BB vectorization always use the first stmt to base
9079 the data ref pointer on. */
9081 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9083 /* Check if the chain of loads is already vectorized. */
9084 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
9085 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9086 ??? But we can only do so if there is exactly one
9087 as we have no way to get at the rest. Leave the CSE
9089 ??? With the group load eventually participating
9090 in multiple different permutations (having multiple
9091 slp nodes which refer to the same group) the CSE
9092 is even wrong code. See PR56270. */
9095 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9098 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9101 /* VEC_NUM is the number of vect stmts to be created for this group. */
9104 grouped_load
= false;
9105 /* If an SLP permutation is from N elements to N elements,
9106 and if one vector holds a whole number of N, we can load
9107 the inputs to the permutation in the same way as an
9108 unpermuted sequence. In other cases we need to load the
9109 whole group, not only the number of vector stmts the
9110 permutation result fits in. */
9112 && (group_size
!= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
)
9113 || !multiple_p (nunits
, group_size
)))
9115 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9116 variable VF; see vect_transform_slp_perm_load. */
9117 unsigned int const_vf
= vf
.to_constant ();
9118 unsigned int const_nunits
= nunits
.to_constant ();
9119 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9120 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9124 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9126 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
9130 vec_num
= group_size
;
9132 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9136 first_stmt_info
= stmt_info
;
9137 first_dr_info
= dr_info
;
9138 group_size
= vec_num
= 1;
9140 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9143 alignment_support_scheme
9144 = vect_supportable_dr_alignment (first_dr_info
, false);
9145 gcc_assert (alignment_support_scheme
);
9146 vec_loop_masks
*loop_masks
9147 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9148 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9150 /* Targets with store-lane instructions must not require explicit
9151 realignment. vect_supportable_dr_alignment always returns either
9152 dr_aligned or dr_unaligned_supported for masked operations. */
9153 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9156 || alignment_support_scheme
== dr_aligned
9157 || alignment_support_scheme
== dr_unaligned_supported
);
9159 /* In case the vectorization factor (VF) is bigger than the number
9160 of elements that we can fit in a vectype (nunits), we have to generate
9161 more than one vector stmt - i.e - we need to "unroll" the
9162 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9163 from one copy of the vector stmt to the next, in the field
9164 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9165 stages to find the correct vector defs to be used when vectorizing
9166 stmts that use the defs of the current stmt. The example below
9167 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9168 need to create 4 vectorized stmts):
9170 before vectorization:
9171 RELATED_STMT VEC_STMT
9175 step 1: vectorize stmt S1:
9176 We first create the vector stmt VS1_0, and, as usual, record a
9177 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9178 Next, we create the vector stmt VS1_1, and record a pointer to
9179 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9180 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9182 RELATED_STMT VEC_STMT
9183 VS1_0: vx0 = memref0 VS1_1 -
9184 VS1_1: vx1 = memref1 VS1_2 -
9185 VS1_2: vx2 = memref2 VS1_3 -
9186 VS1_3: vx3 = memref3 - -
9187 S1: x = load - VS1_0
9190 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9191 information we recorded in RELATED_STMT field is used to vectorize
9194 /* In case of interleaving (non-unit grouped access):
9201 Vectorized loads are created in the order of memory accesses
9202 starting from the access of the first stmt of the chain:
9205 VS2: vx1 = &base + vec_size*1
9206 VS3: vx3 = &base + vec_size*2
9207 VS4: vx4 = &base + vec_size*3
9209 Then permutation statements are generated:
9211 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9212 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9215 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9216 (the order of the data-refs in the output of vect_permute_load_chain
9217 corresponds to the order of scalar stmts in the interleaving chain - see
9218 the documentation of vect_permute_load_chain()).
9219 The generation of permutation stmts and recording them in
9220 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9222 In case of both multiple types and interleaving, the vector loads and
9223 permutation stmts above are created for every copy. The result vector
9224 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9225 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9227 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9228 on a target that supports unaligned accesses (dr_unaligned_supported)
9229 we generate the following code:
9233 p = p + indx * vectype_size;
9238 Otherwise, the data reference is potentially unaligned on a target that
9239 does not support unaligned accesses (dr_explicit_realign_optimized) -
9240 then generate the following code, in which the data in each iteration is
9241 obtained by two vector loads, one from the previous iteration, and one
9242 from the current iteration:
9244 msq_init = *(floor(p1))
9245 p2 = initial_addr + VS - 1;
9246 realignment_token = call target_builtin;
9249 p2 = p2 + indx * vectype_size
9251 vec_dest = realign_load (msq, lsq, realignment_token)
9256 /* If the misalignment remains the same throughout the execution of the
9257 loop, we can create the init_addr and permutation mask at the loop
9258 preheader. Otherwise, it needs to be created inside the loop.
9259 This can only occur when vectorizing memory accesses in the inner-loop
9260 nested within an outer-loop that is being vectorized. */
9262 if (nested_in_vect_loop
9263 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9264 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9266 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9267 compute_in_loop
= true;
9270 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9271 || alignment_support_scheme
== dr_explicit_realign
)
9272 && !compute_in_loop
)
9274 msq
= vect_setup_realignment (first_stmt_info_for_drptr
9275 ? first_stmt_info_for_drptr
9276 : first_stmt_info
, gsi
, &realignment_token
,
9277 alignment_support_scheme
, NULL_TREE
,
9279 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9281 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9282 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9289 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9290 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9293 tree vec_offset
= NULL_TREE
;
9294 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9296 aggr_type
= NULL_TREE
;
9299 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9301 aggr_type
= elem_type
;
9302 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9303 &bump
, &vec_offset
);
9307 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9308 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9310 aggr_type
= vectype
;
9311 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
9312 memory_access_type
);
9315 tree vec_mask
= NULL_TREE
;
9316 prev_stmt_info
= NULL
;
9317 poly_uint64 group_elt
= 0;
9318 for (j
= 0; j
< ncopies
; j
++)
9320 stmt_vec_info new_stmt_info
= NULL
;
9321 /* 1. Create the vector or array pointer update chain. */
9324 bool simd_lane_access_p
9325 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9326 if (simd_lane_access_p
9327 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9328 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9329 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
9330 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9331 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9332 get_alias_set (TREE_TYPE (ref_type
)))
9333 && (alignment_support_scheme
== dr_aligned
9334 || alignment_support_scheme
== dr_unaligned_supported
))
9336 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9337 dataref_offset
= build_int_cst (ref_type
, 0);
9339 else if (first_stmt_info_for_drptr
9340 && first_stmt_info
!= first_stmt_info_for_drptr
)
9343 = vect_create_data_ref_ptr (first_stmt_info_for_drptr
,
9344 aggr_type
, at_loop
, offset
, &dummy
,
9345 gsi
, &ptr_incr
, simd_lane_access_p
,
9347 /* Adjust the pointer by the difference to first_stmt. */
9348 data_reference_p ptrdr
9349 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9351 = fold_convert (sizetype
,
9352 size_binop (MINUS_EXPR
,
9353 DR_INIT (first_dr_info
->dr
),
9355 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9358 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9359 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
9360 &dataref_ptr
, &vec_offset
);
9363 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
, at_loop
,
9364 offset
, &dummy
, gsi
, &ptr_incr
,
9371 auto_vec
<vec
<tree
> > vec_defs (1);
9372 vect_get_slp_defs (slp_node
, &vec_defs
);
9373 vec_mask
= vec_defs
[0][0];
9376 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
9383 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9385 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9386 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
9388 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9391 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
9394 if (grouped_load
|| slp_perm
)
9395 dr_chain
.create (vec_num
);
9397 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9401 vec_array
= create_vector_array (vectype
, vec_num
);
9403 tree final_mask
= NULL_TREE
;
9405 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9408 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9415 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9417 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9418 tree alias_ptr
= build_int_cst (ref_type
, align
);
9419 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9420 dataref_ptr
, alias_ptr
,
9426 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9427 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9428 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9430 gimple_call_set_lhs (call
, vec_array
);
9431 gimple_call_set_nothrow (call
, true);
9432 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
9434 /* Extract each vector into an SSA_NAME. */
9435 for (i
= 0; i
< vec_num
; i
++)
9437 new_temp
= read_vector_array (stmt_info
, gsi
, scalar_dest
,
9439 dr_chain
.quick_push (new_temp
);
9442 /* Record the mapping between SSA_NAMEs and statements. */
9443 vect_record_grouped_load_vectors (stmt_info
, dr_chain
);
9445 /* Record that VEC_ARRAY is now dead. */
9446 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
9450 for (i
= 0; i
< vec_num
; i
++)
9452 tree final_mask
= NULL_TREE
;
9454 && memory_access_type
!= VMAT_INVARIANT
)
9455 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9457 vectype
, vec_num
* j
+ i
);
9459 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9463 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9466 /* 2. Create the vector-load in the loop. */
9467 gimple
*new_stmt
= NULL
;
9468 switch (alignment_support_scheme
)
9471 case dr_unaligned_supported
:
9473 unsigned int misalign
;
9474 unsigned HOST_WIDE_INT align
;
9476 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9478 tree zero
= build_zero_cst (vectype
);
9479 tree scale
= size_int (gs_info
.scale
);
9482 call
= gimple_build_call_internal
9483 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9484 vec_offset
, scale
, zero
, final_mask
);
9486 call
= gimple_build_call_internal
9487 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9488 vec_offset
, scale
, zero
);
9489 gimple_call_set_nothrow (call
, true);
9491 data_ref
= NULL_TREE
;
9496 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9497 if (alignment_support_scheme
== dr_aligned
)
9499 gcc_assert (aligned_access_p (first_dr_info
));
9502 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9504 align
= dr_alignment
9505 (vect_dr_behavior (first_dr_info
));
9509 misalign
= DR_MISALIGNMENT (first_dr_info
);
9510 if (dataref_offset
== NULL_TREE
9511 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9512 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9517 align
= least_bit_hwi (misalign
| align
);
9518 tree ptr
= build_int_cst (ref_type
, align
);
9520 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9523 gimple_call_set_nothrow (call
, true);
9525 data_ref
= NULL_TREE
;
9529 tree ltype
= vectype
;
9530 /* If there's no peeling for gaps but we have a gap
9531 with slp loads then load the lower half of the
9532 vector only. See get_group_load_store_type for
9533 when we apply this optimization. */
9536 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9537 && DR_GROUP_GAP (first_stmt_info
) != 0
9538 && known_eq (nunits
,
9540 - DR_GROUP_GAP (first_stmt_info
)) * 2)
9541 && known_eq (nunits
, group_size
))
9542 ltype
= build_vector_type (TREE_TYPE (vectype
),
9545 (first_stmt_info
)));
9547 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
,
9550 : build_int_cst (ref_type
, 0));
9551 if (alignment_support_scheme
== dr_aligned
)
9553 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9554 TREE_TYPE (data_ref
)
9555 = build_aligned_type (TREE_TYPE (data_ref
),
9556 align
* BITS_PER_UNIT
);
9558 TREE_TYPE (data_ref
)
9559 = build_aligned_type (TREE_TYPE (data_ref
),
9560 TYPE_ALIGN (elem_type
));
9561 if (ltype
!= vectype
)
9563 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9564 tree tem
= make_ssa_name (ltype
);
9565 new_stmt
= gimple_build_assign (tem
, data_ref
);
9566 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9568 vec
<constructor_elt
, va_gc
> *v
;
9570 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9571 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9572 build_zero_cst (ltype
));
9574 = gimple_build_assign (vec_dest
,
9581 case dr_explicit_realign
:
9585 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9587 if (compute_in_loop
)
9588 msq
= vect_setup_realignment (first_stmt_info
, gsi
,
9590 dr_explicit_realign
,
9593 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9594 ptr
= copy_ssa_name (dataref_ptr
);
9596 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9597 // For explicit realign the target alignment should be
9598 // known at compile time.
9599 unsigned HOST_WIDE_INT align
=
9600 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9601 new_stmt
= gimple_build_assign
9602 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9604 (TREE_TYPE (dataref_ptr
),
9605 -(HOST_WIDE_INT
) align
));
9606 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9608 = build2 (MEM_REF
, vectype
, ptr
,
9609 build_int_cst (ref_type
, 0));
9610 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9611 vec_dest
= vect_create_destination_var (scalar_dest
,
9613 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9614 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9615 gimple_assign_set_lhs (new_stmt
, new_temp
);
9616 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9617 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9620 bump
= size_binop (MULT_EXPR
, vs
,
9621 TYPE_SIZE_UNIT (elem_type
));
9622 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9623 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
,
9625 new_stmt
= gimple_build_assign
9626 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9628 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9629 ptr
= copy_ssa_name (ptr
, new_stmt
);
9630 gimple_assign_set_lhs (new_stmt
, ptr
);
9631 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9633 = build2 (MEM_REF
, vectype
, ptr
,
9634 build_int_cst (ref_type
, 0));
9637 case dr_explicit_realign_optimized
:
9639 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9640 new_temp
= copy_ssa_name (dataref_ptr
);
9642 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9643 // We should only be doing this if we know the target
9644 // alignment at compile time.
9645 unsigned HOST_WIDE_INT align
=
9646 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9647 new_stmt
= gimple_build_assign
9648 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9649 build_int_cst (TREE_TYPE (dataref_ptr
),
9650 -(HOST_WIDE_INT
) align
));
9651 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9653 = build2 (MEM_REF
, vectype
, new_temp
,
9654 build_int_cst (ref_type
, 0));
9660 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9661 /* DATA_REF is null if we've already built the statement. */
9664 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9665 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9667 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9668 gimple_set_lhs (new_stmt
, new_temp
);
9670 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9672 /* 3. Handle explicit realignment if necessary/supported.
9674 vec_dest = realign_load (msq, lsq, realignment_token) */
9675 if (alignment_support_scheme
== dr_explicit_realign_optimized
9676 || alignment_support_scheme
== dr_explicit_realign
)
9678 lsq
= gimple_assign_lhs (new_stmt
);
9679 if (!realignment_token
)
9680 realignment_token
= dataref_ptr
;
9681 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9682 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9683 msq
, lsq
, realignment_token
);
9684 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9685 gimple_assign_set_lhs (new_stmt
, new_temp
);
9687 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9689 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9692 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9693 add_phi_arg (phi
, lsq
,
9694 loop_latch_edge (containing_loop
),
9700 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9702 tree perm_mask
= perm_mask_for_reverse (vectype
);
9703 new_temp
= permute_vec_elements (new_temp
, new_temp
,
9704 perm_mask
, stmt_info
, gsi
);
9705 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9708 /* Collect vector loads and later create their permutation in
9709 vect_transform_grouped_load (). */
9710 if (grouped_load
|| slp_perm
)
9711 dr_chain
.quick_push (new_temp
);
9713 /* Store vector loads in the corresponding SLP_NODE. */
9714 if (slp
&& !slp_perm
)
9715 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9717 /* With SLP permutation we load the gaps as well, without
9718 we need to skip the gaps after we manage to fully load
9719 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9720 group_elt
+= nunits
;
9721 if (maybe_ne (group_gap_adj
, 0U)
9723 && known_eq (group_elt
, group_size
- group_gap_adj
))
9725 poly_wide_int bump_val
9726 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9728 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9729 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9734 /* Bump the vector pointer to account for a gap or for excess
9735 elements loaded for a permuted SLP load. */
9736 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9738 poly_wide_int bump_val
9739 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9741 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9742 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9747 if (slp
&& !slp_perm
)
9753 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
9754 slp_node_instance
, false,
9757 dr_chain
.release ();
9765 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9766 vect_transform_grouped_load (stmt_info
, dr_chain
,
9768 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9773 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9775 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9776 prev_stmt_info
= new_stmt_info
;
9779 dr_chain
.release ();
9785 /* Function vect_is_simple_cond.
9788 LOOP - the loop that is being vectorized.
9789 COND - Condition that is checked for simple use.
9792 *COMP_VECTYPE - the vector type for the comparison.
9793 *DTS - The def types for the arguments of the comparison
9795 Returns whether a COND can be vectorized. Checks whether
9796 condition operands are supportable using vec_is_simple_use. */
9799 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
9800 tree
*comp_vectype
, enum vect_def_type
*dts
,
9804 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9807 if (TREE_CODE (cond
) == SSA_NAME
9808 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9810 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
9812 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9817 if (!COMPARISON_CLASS_P (cond
))
9820 lhs
= TREE_OPERAND (cond
, 0);
9821 rhs
= TREE_OPERAND (cond
, 1);
9823 if (TREE_CODE (lhs
) == SSA_NAME
)
9825 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
9828 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9829 || TREE_CODE (lhs
) == FIXED_CST
)
9830 dts
[0] = vect_constant_def
;
9834 if (TREE_CODE (rhs
) == SSA_NAME
)
9836 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
9839 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
9840 || TREE_CODE (rhs
) == FIXED_CST
)
9841 dts
[1] = vect_constant_def
;
9845 if (vectype1
&& vectype2
9846 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9847 TYPE_VECTOR_SUBPARTS (vectype2
)))
9850 *comp_vectype
= vectype1
? vectype1
: vectype2
;
9851 /* Invariant comparison. */
9852 if (! *comp_vectype
)
9854 tree scalar_type
= TREE_TYPE (lhs
);
9855 /* If we can widen the comparison to match vectype do so. */
9856 if (INTEGRAL_TYPE_P (scalar_type
)
9858 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
9859 TYPE_SIZE (TREE_TYPE (vectype
))))
9860 scalar_type
= build_nonstandard_integer_type
9861 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
9862 TYPE_UNSIGNED (scalar_type
));
9863 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
9869 /* vectorizable_condition.
9871 Check if STMT_INFO is conditional modify expression that can be vectorized.
9872 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9873 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9876 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9878 Return true if STMT_INFO is vectorizable in this way. */
9881 vectorizable_condition (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9882 stmt_vec_info
*vec_stmt
,
9883 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9885 vec_info
*vinfo
= stmt_info
->vinfo
;
9886 tree scalar_dest
= NULL_TREE
;
9887 tree vec_dest
= NULL_TREE
;
9888 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
9889 tree then_clause
, else_clause
;
9890 tree comp_vectype
= NULL_TREE
;
9891 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
9892 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
9895 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9896 enum vect_def_type dts
[4]
9897 = {vect_unknown_def_type
, vect_unknown_def_type
,
9898 vect_unknown_def_type
, vect_unknown_def_type
};
9901 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9902 stmt_vec_info prev_stmt_info
= NULL
;
9904 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9905 vec
<tree
> vec_oprnds0
= vNULL
;
9906 vec
<tree
> vec_oprnds1
= vNULL
;
9907 vec
<tree
> vec_oprnds2
= vNULL
;
9908 vec
<tree
> vec_oprnds3
= vNULL
;
9910 bool masked
= false;
9912 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9915 /* Is vectorizable conditional operation? */
9916 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9920 code
= gimple_assign_rhs_code (stmt
);
9921 if (code
!= COND_EXPR
)
9924 stmt_vec_info reduc_info
= NULL
;
9925 int reduc_index
= -1;
9926 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
9928 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
9931 if (STMT_SLP_TYPE (stmt_info
))
9933 reduc_info
= info_for_reduction (stmt_info
);
9934 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
9935 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
9936 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
9937 || reduc_index
!= -1);
9941 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
9944 /* FORNOW: only supported as part of a reduction. */
9945 if (STMT_VINFO_LIVE_P (stmt_info
))
9947 if (dump_enabled_p ())
9948 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9949 "value used after loop.\n");
9954 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9955 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9960 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9962 gcc_assert (ncopies
>= 1);
9963 if (for_reduction
&& ncopies
> 1)
9964 return false; /* FORNOW */
9966 cond_expr
= gimple_assign_rhs1 (stmt
);
9967 then_clause
= gimple_assign_rhs2 (stmt
);
9968 else_clause
= gimple_assign_rhs3 (stmt
);
9970 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
9971 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
9975 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
9977 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
9980 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
9983 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
9986 masked
= !COMPARISON_CLASS_P (cond_expr
);
9987 vec_cmp_type
= truth_type_for (comp_vectype
);
9989 if (vec_cmp_type
== NULL_TREE
)
9992 cond_code
= TREE_CODE (cond_expr
);
9995 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
9996 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
9999 /* For conditional reductions, the "then" value needs to be the candidate
10000 value calculated by this iteration while the "else" value needs to be
10001 the result carried over from previous iterations. If the COND_EXPR
10002 is the other way around, we need to swap it. */
10003 bool must_invert_cmp_result
= false;
10004 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10007 must_invert_cmp_result
= true;
10010 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10011 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10012 if (new_code
== ERROR_MARK
)
10013 must_invert_cmp_result
= true;
10015 cond_code
= new_code
;
10017 /* Make sure we don't accidentally use the old condition. */
10018 cond_expr
= NULL_TREE
;
10019 std::swap (then_clause
, else_clause
);
10022 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10024 /* Boolean values may have another representation in vectors
10025 and therefore we prefer bit operations over comparison for
10026 them (which also works for scalar masks). We store opcodes
10027 to use in bitop1 and bitop2. Statement is vectorized as
10028 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10029 depending on bitop1 and bitop2 arity. */
10033 bitop1
= BIT_NOT_EXPR
;
10034 bitop2
= BIT_AND_EXPR
;
10037 bitop1
= BIT_NOT_EXPR
;
10038 bitop2
= BIT_IOR_EXPR
;
10041 bitop1
= BIT_NOT_EXPR
;
10042 bitop2
= BIT_AND_EXPR
;
10043 std::swap (cond_expr0
, cond_expr1
);
10046 bitop1
= BIT_NOT_EXPR
;
10047 bitop2
= BIT_IOR_EXPR
;
10048 std::swap (cond_expr0
, cond_expr1
);
10051 bitop1
= BIT_XOR_EXPR
;
10054 bitop1
= BIT_XOR_EXPR
;
10055 bitop2
= BIT_NOT_EXPR
;
10060 cond_code
= SSA_NAME
;
10065 if (bitop1
!= NOP_EXPR
)
10067 machine_mode mode
= TYPE_MODE (comp_vectype
);
10070 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10071 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10074 if (bitop2
!= NOP_EXPR
)
10076 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10078 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10082 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
10085 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10086 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10097 vec_oprnds0
.create (1);
10098 vec_oprnds1
.create (1);
10099 vec_oprnds2
.create (1);
10100 vec_oprnds3
.create (1);
10104 scalar_dest
= gimple_assign_lhs (stmt
);
10105 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10106 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10108 /* Handle cond expr. */
10109 for (j
= 0; j
< ncopies
; j
++)
10111 bool swap_cond_operands
= false;
10113 /* See whether another part of the vectorized code applies a loop
10114 mask to the condition, or to its inverse. */
10116 vec_loop_masks
*masks
= NULL
;
10117 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10119 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10120 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10123 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10124 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10125 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10128 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10129 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10130 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10132 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10133 cond_code
= cond
.code
;
10134 swap_cond_operands
= true;
10140 stmt_vec_info new_stmt_info
= NULL
;
10145 auto_vec
<vec
<tree
>, 4> vec_defs
;
10146 vect_get_slp_defs (slp_node
, &vec_defs
);
10147 vec_oprnds3
= vec_defs
.pop ();
10148 vec_oprnds2
= vec_defs
.pop ();
10150 vec_oprnds1
= vec_defs
.pop ();
10151 vec_oprnds0
= vec_defs
.pop ();
10158 = vect_get_vec_def_for_operand (cond_expr
, stmt_info
,
10164 = vect_get_vec_def_for_operand (cond_expr0
,
10165 stmt_info
, comp_vectype
);
10167 = vect_get_vec_def_for_operand (cond_expr1
,
10168 stmt_info
, comp_vectype
);
10170 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
10172 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10173 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
10180 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
10183 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
10185 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10186 vec_oprnds2
.pop ());
10187 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10188 vec_oprnds3
.pop ());
10193 vec_oprnds0
.quick_push (vec_cond_lhs
);
10195 vec_oprnds1
.quick_push (vec_cond_rhs
);
10196 vec_oprnds2
.quick_push (vec_then_clause
);
10197 vec_oprnds3
.quick_push (vec_else_clause
);
10200 /* Arguments are ready. Create the new vector stmt. */
10201 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10203 vec_then_clause
= vec_oprnds2
[i
];
10204 vec_else_clause
= vec_oprnds3
[i
];
10206 if (swap_cond_operands
)
10207 std::swap (vec_then_clause
, vec_else_clause
);
10210 vec_compare
= vec_cond_lhs
;
10213 vec_cond_rhs
= vec_oprnds1
[i
];
10214 if (bitop1
== NOP_EXPR
)
10215 vec_compare
= build2 (cond_code
, vec_cmp_type
,
10216 vec_cond_lhs
, vec_cond_rhs
);
10219 new_temp
= make_ssa_name (vec_cmp_type
);
10221 if (bitop1
== BIT_NOT_EXPR
)
10222 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10226 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10228 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10229 if (bitop2
== NOP_EXPR
)
10230 vec_compare
= new_temp
;
10231 else if (bitop2
== BIT_NOT_EXPR
)
10233 /* Instead of doing ~x ? y : z do x ? z : y. */
10234 vec_compare
= new_temp
;
10235 std::swap (vec_then_clause
, vec_else_clause
);
10239 vec_compare
= make_ssa_name (vec_cmp_type
);
10241 = gimple_build_assign (vec_compare
, bitop2
,
10242 vec_cond_lhs
, new_temp
);
10243 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10248 /* If we decided to apply a loop mask to the result of the vector
10249 comparison, AND the comparison with the mask now. Later passes
10250 should then be able to reuse the AND results between mulitple
10254 for (int i = 0; i < 100; ++i)
10255 x[i] = y[i] ? z[i] : 10;
10257 results in following optimized GIMPLE:
10259 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10260 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10261 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10262 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10263 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10264 vect_iftmp.11_47, { 10, ... }>;
10266 instead of using a masked and unmasked forms of
10267 vec != { 0, ... } (masked in the MASK_LOAD,
10268 unmasked in the VEC_COND_EXPR). */
10270 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10271 in cases where that's necessary. */
10273 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10275 if (!is_gimple_val (vec_compare
))
10277 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10278 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10280 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10281 vec_compare
= vec_compare_name
;
10284 if (must_invert_cmp_result
)
10286 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10287 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10290 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10291 vec_compare
= vec_compare_name
;
10296 unsigned vec_num
= vec_oprnds0
.length ();
10298 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10299 vectype
, vec_num
* j
+ i
);
10300 tree tmp2
= make_ssa_name (vec_cmp_type
);
10302 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10304 vect_finish_stmt_generation (stmt_info
, g
, gsi
);
10305 vec_compare
= tmp2
;
10309 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10311 gcall
*new_stmt
= gimple_build_call_internal
10312 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10314 gimple_call_set_lhs (new_stmt
, scalar_dest
);
10315 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
10316 if (stmt_info
->stmt
== gsi_stmt (*gsi
))
10317 new_stmt_info
= vect_finish_replace_stmt (stmt_info
, new_stmt
);
10320 /* In this case we're moving the definition to later in the
10321 block. That doesn't matter because the only uses of the
10322 lhs are in phi statements. */
10323 gimple_stmt_iterator old_gsi
10324 = gsi_for_stmt (stmt_info
->stmt
);
10325 gsi_remove (&old_gsi
, true);
10327 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10332 new_temp
= make_ssa_name (vec_dest
);
10334 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10335 vec_then_clause
, vec_else_clause
);
10337 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10340 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10347 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10349 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10351 prev_stmt_info
= new_stmt_info
;
10354 vec_oprnds0
.release ();
10355 vec_oprnds1
.release ();
10356 vec_oprnds2
.release ();
10357 vec_oprnds3
.release ();
10362 /* vectorizable_comparison.
10364 Check if STMT_INFO is comparison expression that can be vectorized.
10365 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10366 comparison, put it in VEC_STMT, and insert it at GSI.
10368 Return true if STMT_INFO is vectorizable in this way. */
10371 vectorizable_comparison (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10372 stmt_vec_info
*vec_stmt
,
10373 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10375 vec_info
*vinfo
= stmt_info
->vinfo
;
10376 tree lhs
, rhs1
, rhs2
;
10377 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10378 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10379 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10381 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10382 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10384 poly_uint64 nunits
;
10386 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10387 stmt_vec_info prev_stmt_info
= NULL
;
10389 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
10390 vec
<tree
> vec_oprnds0
= vNULL
;
10391 vec
<tree
> vec_oprnds1
= vNULL
;
10395 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10398 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10401 mask_type
= vectype
;
10402 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10407 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10409 gcc_assert (ncopies
>= 1);
10410 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10413 if (STMT_VINFO_LIVE_P (stmt_info
))
10415 if (dump_enabled_p ())
10416 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10417 "value used after loop.\n");
10421 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10425 code
= gimple_assign_rhs_code (stmt
);
10427 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10430 rhs1
= gimple_assign_rhs1 (stmt
);
10431 rhs2
= gimple_assign_rhs2 (stmt
);
10433 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
10436 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
10439 if (vectype1
&& vectype2
10440 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10441 TYPE_VECTOR_SUBPARTS (vectype2
)))
10444 vectype
= vectype1
? vectype1
: vectype2
;
10446 /* Invariant comparison. */
10449 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
));
10450 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10453 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10456 /* Can't compare mask and non-mask types. */
10457 if (vectype1
&& vectype2
10458 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10461 /* Boolean values may have another representation in vectors
10462 and therefore we prefer bit operations over comparison for
10463 them (which also works for scalar masks). We store opcodes
10464 to use in bitop1 and bitop2. Statement is vectorized as
10465 BITOP2 (rhs1 BITOP1 rhs2) or
10466 rhs1 BITOP2 (BITOP1 rhs2)
10467 depending on bitop1 and bitop2 arity. */
10468 bool swap_p
= false;
10469 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10471 if (code
== GT_EXPR
)
10473 bitop1
= BIT_NOT_EXPR
;
10474 bitop2
= BIT_AND_EXPR
;
10476 else if (code
== GE_EXPR
)
10478 bitop1
= BIT_NOT_EXPR
;
10479 bitop2
= BIT_IOR_EXPR
;
10481 else if (code
== LT_EXPR
)
10483 bitop1
= BIT_NOT_EXPR
;
10484 bitop2
= BIT_AND_EXPR
;
10487 else if (code
== LE_EXPR
)
10489 bitop1
= BIT_NOT_EXPR
;
10490 bitop2
= BIT_IOR_EXPR
;
10495 bitop1
= BIT_XOR_EXPR
;
10496 if (code
== EQ_EXPR
)
10497 bitop2
= BIT_NOT_EXPR
;
10503 if (bitop1
== NOP_EXPR
)
10505 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10510 machine_mode mode
= TYPE_MODE (vectype
);
10513 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10514 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10517 if (bitop2
!= NOP_EXPR
)
10519 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10520 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10525 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10526 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10527 dts
, ndts
, slp_node
, cost_vec
);
10534 vec_oprnds0
.create (1);
10535 vec_oprnds1
.create (1);
10539 lhs
= gimple_assign_lhs (stmt
);
10540 mask
= vect_create_destination_var (lhs
, mask_type
);
10542 /* Handle cmp expr. */
10543 for (j
= 0; j
< ncopies
; j
++)
10545 stmt_vec_info new_stmt_info
= NULL
;
10550 auto_vec
<vec
<tree
>, 2> vec_defs
;
10551 vect_get_slp_defs (slp_node
, &vec_defs
);
10552 vec_oprnds1
= vec_defs
.pop ();
10553 vec_oprnds0
= vec_defs
.pop ();
10555 std::swap (vec_oprnds0
, vec_oprnds1
);
10559 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt_info
,
10561 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt_info
,
10567 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
10568 vec_oprnds0
.pop ());
10569 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
10570 vec_oprnds1
.pop ());
10575 if (swap_p
&& j
== 0)
10576 std::swap (vec_rhs1
, vec_rhs2
);
10577 vec_oprnds0
.quick_push (vec_rhs1
);
10578 vec_oprnds1
.quick_push (vec_rhs2
);
10581 /* Arguments are ready. Create the new vector stmt. */
10582 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10584 vec_rhs2
= vec_oprnds1
[i
];
10586 new_temp
= make_ssa_name (mask
);
10587 if (bitop1
== NOP_EXPR
)
10589 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
10590 vec_rhs1
, vec_rhs2
);
10592 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10597 if (bitop1
== BIT_NOT_EXPR
)
10598 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10600 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10603 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10604 if (bitop2
!= NOP_EXPR
)
10606 tree res
= make_ssa_name (mask
);
10607 if (bitop2
== BIT_NOT_EXPR
)
10608 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10610 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10613 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10617 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10624 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10626 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10628 prev_stmt_info
= new_stmt_info
;
10631 vec_oprnds0
.release ();
10632 vec_oprnds1
.release ();
10637 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10638 can handle all live statements in the node. Otherwise return true
10639 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10640 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10643 can_vectorize_live_stmts (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10644 slp_tree slp_node
, slp_instance slp_node_instance
,
10646 stmt_vector_for_cost
*cost_vec
)
10650 stmt_vec_info slp_stmt_info
;
10652 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10654 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10655 && !vectorizable_live_operation (slp_stmt_info
, gsi
, slp_node
,
10656 slp_node_instance
, i
,
10657 vec_stmt_p
, cost_vec
))
10661 else if (STMT_VINFO_LIVE_P (stmt_info
)
10662 && !vectorizable_live_operation (stmt_info
, gsi
, slp_node
,
10663 slp_node_instance
, -1,
10664 vec_stmt_p
, cost_vec
))
10670 /* Make sure the statement is vectorizable. */
10673 vect_analyze_stmt (stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10674 slp_tree node
, slp_instance node_instance
,
10675 stmt_vector_for_cost
*cost_vec
)
10677 vec_info
*vinfo
= stmt_info
->vinfo
;
10678 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
10679 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10681 gimple_seq pattern_def_seq
;
10683 if (dump_enabled_p ())
10684 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10687 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10688 return opt_result::failure_at (stmt_info
->stmt
,
10690 " stmt has volatile operands: %G\n",
10693 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10695 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10697 gimple_stmt_iterator si
;
10699 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10701 stmt_vec_info pattern_def_stmt_info
10702 = vinfo
->lookup_stmt (gsi_stmt (si
));
10703 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10704 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10706 /* Analyze def stmt of STMT if it's a pattern stmt. */
10707 if (dump_enabled_p ())
10708 dump_printf_loc (MSG_NOTE
, vect_location
,
10709 "==> examining pattern def statement: %G",
10710 pattern_def_stmt_info
->stmt
);
10713 = vect_analyze_stmt (pattern_def_stmt_info
,
10714 need_to_vectorize
, node
, node_instance
,
10722 /* Skip stmts that do not need to be vectorized. In loops this is expected
10724 - the COND_EXPR which is the loop exit condition
10725 - any LABEL_EXPRs in the loop
10726 - computations that are used only for array indexing or loop control.
10727 In basic blocks we only analyze statements that are a part of some SLP
10728 instance, therefore, all the statements are relevant.
10730 Pattern statement needs to be analyzed instead of the original statement
10731 if the original statement is not relevant. Otherwise, we analyze both
10732 statements. In basic blocks we are called from some SLP instance
10733 traversal, don't analyze pattern stmts instead, the pattern stmts
10734 already will be part of SLP instance. */
10736 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10737 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10738 && !STMT_VINFO_LIVE_P (stmt_info
))
10740 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10741 && pattern_stmt_info
10742 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10743 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10745 /* Analyze PATTERN_STMT instead of the original stmt. */
10746 stmt_info
= pattern_stmt_info
;
10747 if (dump_enabled_p ())
10748 dump_printf_loc (MSG_NOTE
, vect_location
,
10749 "==> examining pattern statement: %G",
10754 if (dump_enabled_p ())
10755 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10757 return opt_result::success ();
10760 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10762 && pattern_stmt_info
10763 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10764 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10766 /* Analyze PATTERN_STMT too. */
10767 if (dump_enabled_p ())
10768 dump_printf_loc (MSG_NOTE
, vect_location
,
10769 "==> examining pattern statement: %G",
10770 pattern_stmt_info
->stmt
);
10773 = vect_analyze_stmt (pattern_stmt_info
, need_to_vectorize
, node
,
10774 node_instance
, cost_vec
);
10779 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10781 case vect_internal_def
:
10784 case vect_reduction_def
:
10785 case vect_nested_cycle
:
10786 gcc_assert (!bb_vinfo
10787 && (relevance
== vect_used_in_outer
10788 || relevance
== vect_used_in_outer_by_reduction
10789 || relevance
== vect_used_by_reduction
10790 || relevance
== vect_unused_in_scope
10791 || relevance
== vect_used_only_live
));
10794 case vect_induction_def
:
10795 gcc_assert (!bb_vinfo
);
10798 case vect_constant_def
:
10799 case vect_external_def
:
10800 case vect_unknown_def_type
:
10802 gcc_unreachable ();
10805 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10807 tree type
= gimple_expr_type (stmt_info
->stmt
);
10808 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
10809 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10810 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10811 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10812 *need_to_vectorize
= true;
10815 if (PURE_SLP_STMT (stmt_info
) && !node
)
10817 if (dump_enabled_p ())
10818 dump_printf_loc (MSG_NOTE
, vect_location
,
10819 "handled only by SLP analysis\n");
10820 return opt_result::success ();
10825 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10826 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10827 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10828 -mveclibabi= takes preference over library functions with
10829 the simd attribute. */
10830 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10831 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
10833 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10834 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10835 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10836 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
10838 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10839 || vectorizable_reduction (stmt_info
, node
, node_instance
, cost_vec
)
10840 || vectorizable_induction (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10841 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10842 || vectorizable_condition (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10843 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
10845 || vectorizable_lc_phi (stmt_info
, NULL
, node
));
10849 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10850 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
10852 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
,
10854 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10855 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10856 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
,
10858 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
10860 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10861 || vectorizable_condition (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10862 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
10867 return opt_result::failure_at (stmt_info
->stmt
,
10869 " relevant stmt not supported: %G",
10872 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10873 need extra handling, except for vectorizable reductions. */
10875 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10876 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
10877 && !can_vectorize_live_stmts (stmt_info
, NULL
, node
, node_instance
,
10879 return opt_result::failure_at (stmt_info
->stmt
,
10881 " live stmt not supported: %G",
10884 return opt_result::success ();
10888 /* Function vect_transform_stmt.
10890 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10893 vect_transform_stmt (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10894 slp_tree slp_node
, slp_instance slp_node_instance
)
10896 vec_info
*vinfo
= stmt_info
->vinfo
;
10897 bool is_store
= false;
10898 stmt_vec_info vec_stmt
= NULL
;
10901 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
10902 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
10904 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
10905 && nested_in_vect_loop_p
10906 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
10909 gimple
*stmt
= stmt_info
->stmt
;
10910 switch (STMT_VINFO_TYPE (stmt_info
))
10912 case type_demotion_vec_info_type
:
10913 case type_promotion_vec_info_type
:
10914 case type_conversion_vec_info_type
:
10915 done
= vectorizable_conversion (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10920 case induc_vec_info_type
:
10921 done
= vectorizable_induction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10926 case shift_vec_info_type
:
10927 done
= vectorizable_shift (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10931 case op_vec_info_type
:
10932 done
= vectorizable_operation (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10937 case assignment_vec_info_type
:
10938 done
= vectorizable_assignment (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10943 case load_vec_info_type
:
10944 done
= vectorizable_load (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10945 slp_node_instance
, NULL
);
10949 case store_vec_info_type
:
10950 done
= vectorizable_store (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10952 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
10954 /* In case of interleaving, the whole chain is vectorized when the
10955 last store in the chain is reached. Store stmts before the last
10956 one are skipped, and there vec_stmt_info shouldn't be freed
10958 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10959 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
10966 case condition_vec_info_type
:
10967 done
= vectorizable_condition (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10971 case comparison_vec_info_type
:
10972 done
= vectorizable_comparison (stmt_info
, gsi
, &vec_stmt
,
10977 case call_vec_info_type
:
10978 done
= vectorizable_call (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10979 stmt
= gsi_stmt (*gsi
);
10982 case call_simd_clone_vec_info_type
:
10983 done
= vectorizable_simd_clone_call (stmt_info
, gsi
, &vec_stmt
,
10985 stmt
= gsi_stmt (*gsi
);
10988 case reduc_vec_info_type
:
10989 done
= vect_transform_reduction (stmt_info
, gsi
, &vec_stmt
, slp_node
);
10993 case cycle_phi_info_type
:
10994 done
= vect_transform_cycle_phi (stmt_info
, &vec_stmt
, slp_node
,
10995 slp_node_instance
);
10999 case lc_phi_info_type
:
11000 done
= vectorizable_lc_phi (stmt_info
, &vec_stmt
, slp_node
);
11005 if (!STMT_VINFO_LIVE_P (stmt_info
))
11007 if (dump_enabled_p ())
11008 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11009 "stmt not supported.\n");
11010 gcc_unreachable ();
11014 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11015 This would break hybrid SLP vectorization. */
11017 gcc_assert (!vec_stmt
11018 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
11020 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11021 is being vectorized, but outside the immediately enclosing loop. */
11024 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11025 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
11026 || STMT_VINFO_RELEVANT (stmt_info
) ==
11027 vect_used_in_outer_by_reduction
))
11029 class loop
*innerloop
= LOOP_VINFO_LOOP (
11030 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
11031 imm_use_iterator imm_iter
;
11032 use_operand_p use_p
;
11035 if (dump_enabled_p ())
11036 dump_printf_loc (MSG_NOTE
, vect_location
,
11037 "Record the vdef for outer-loop vectorization.\n");
11039 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11040 (to be used when vectorizing outer-loop stmts that use the DEF of
11042 if (gimple_code (stmt
) == GIMPLE_PHI
)
11043 scalar_dest
= PHI_RESULT (stmt
);
11045 scalar_dest
= gimple_get_lhs (stmt
);
11047 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
11048 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
11050 stmt_vec_info exit_phi_info
11051 = vinfo
->lookup_stmt (USE_STMT (use_p
));
11052 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
11057 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
11059 if (STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
11062 /* If this stmt defines a value used on a backedge, update the
11063 vectorized PHIs. */
11064 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
11065 stmt_vec_info reduc_info
;
11066 if (STMT_VINFO_REDUC_DEF (orig_stmt_info
)
11067 && vect_stmt_to_vectorize (orig_stmt_info
) == stmt_info
11068 && (reduc_info
= info_for_reduction (orig_stmt_info
))
11069 && STMT_VINFO_REDUC_TYPE (reduc_info
) != FOLD_LEFT_REDUCTION
11070 && STMT_VINFO_REDUC_TYPE (reduc_info
) != EXTRACT_LAST_REDUCTION
)
11075 && (phi
= dyn_cast
<gphi
*>
11076 (STMT_VINFO_REDUC_DEF (orig_stmt_info
)->stmt
))
11077 && dominated_by_p (CDI_DOMINATORS
,
11078 gimple_bb (orig_stmt_info
->stmt
), gimple_bb (phi
))
11079 && (e
= loop_latch_edge (gimple_bb (phi
)->loop_father
))
11080 && (PHI_ARG_DEF_FROM_EDGE (phi
, e
)
11081 == gimple_get_lhs (orig_stmt_info
->stmt
)))
11083 stmt_vec_info phi_info
11084 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info
));
11085 stmt_vec_info vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
11088 add_phi_arg (as_a
<gphi
*> (phi_info
->stmt
),
11089 gimple_get_lhs (vec_stmt
->stmt
), e
,
11090 gimple_phi_arg_location (phi
, e
->dest_idx
));
11091 phi_info
= STMT_VINFO_RELATED_STMT (phi_info
);
11092 vec_stmt
= STMT_VINFO_RELATED_STMT (vec_stmt
);
11095 gcc_assert (!vec_stmt
);
11098 && slp_node
!= slp_node_instance
->reduc_phis
)
11100 slp_tree phi_node
= slp_node_instance
->reduc_phis
;
11101 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_SCALAR_STMTS (phi_node
)[0]->stmt
);
11102 e
= loop_latch_edge (gimple_bb (phi
)->loop_father
);
11103 gcc_assert (SLP_TREE_VEC_STMTS (phi_node
).length ()
11104 == SLP_TREE_VEC_STMTS (slp_node
).length ());
11105 for (unsigned i
= 0; i
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++i
)
11106 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[i
]->stmt
),
11107 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]->stmt
),
11108 e
, gimple_phi_arg_location (phi
, e
->dest_idx
));
11112 /* Handle stmts whose DEF is used outside the loop-nest that is
11113 being vectorized. */
11114 done
= can_vectorize_live_stmts (stmt_info
, gsi
, slp_node
,
11115 slp_node_instance
, true, NULL
);
11122 /* Remove a group of stores (for SLP or interleaving), free their
11126 vect_remove_stores (stmt_vec_info first_stmt_info
)
11128 vec_info
*vinfo
= first_stmt_info
->vinfo
;
11129 stmt_vec_info next_stmt_info
= first_stmt_info
;
11131 while (next_stmt_info
)
11133 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11134 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11135 /* Free the attached stmt_vec_info and remove the stmt. */
11136 vinfo
->remove_stmt (next_stmt_info
);
11137 next_stmt_info
= tmp
;
11141 /* Function get_vectype_for_scalar_type_and_size.
11143 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
11147 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
11149 tree orig_scalar_type
= scalar_type
;
11150 scalar_mode inner_mode
;
11151 machine_mode simd_mode
;
11152 poly_uint64 nunits
;
11155 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11156 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11159 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11161 /* For vector types of elements whose mode precision doesn't
11162 match their types precision we use a element type of mode
11163 precision. The vectorization routines will have to make sure
11164 they support the proper result truncation/extension.
11165 We also make sure to build vector types with INTEGER_TYPE
11166 component type only. */
11167 if (INTEGRAL_TYPE_P (scalar_type
)
11168 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11169 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11170 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11171 TYPE_UNSIGNED (scalar_type
));
11173 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11174 When the component mode passes the above test simply use a type
11175 corresponding to that mode. The theory is that any use that
11176 would cause problems with this will disable vectorization anyway. */
11177 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11178 && !INTEGRAL_TYPE_P (scalar_type
))
11179 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11181 /* We can't build a vector type of elements with alignment bigger than
11183 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11184 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11185 TYPE_UNSIGNED (scalar_type
));
11187 /* If we felt back to using the mode fail if there was
11188 no scalar type for it. */
11189 if (scalar_type
== NULL_TREE
)
11192 /* If no size was supplied use the mode the target prefers. Otherwise
11193 lookup a vector mode of the specified size. */
11194 if (known_eq (size
, 0U))
11196 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11197 if (SCALAR_INT_MODE_P (simd_mode
))
11199 /* Traditional behavior is not to take the integer mode
11200 literally, but simply to use it as a way of determining
11201 the vector size. It is up to mode_for_vector to decide
11202 what the TYPE_MODE should be.
11204 Note that nunits == 1 is allowed in order to support single
11205 element vector types. */
11206 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11207 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11211 else if (!multiple_p (size
, nbytes
, &nunits
)
11212 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11215 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11217 /* In cases where the mode was chosen by mode_for_vector, check that
11218 the target actually supports the chosen mode, or that it at least
11219 allows the vector mode to be replaced by a like-sized integer. */
11220 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11221 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11224 /* Re-attach the address-space qualifier if we canonicalized the scalar
11226 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11227 return build_qualified_type
11228 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11233 /* Function get_vectype_for_scalar_type.
11235 Returns the vector type corresponding to SCALAR_TYPE as supported
11239 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
)
11242 poly_uint64 vector_size
= GET_MODE_SIZE (vinfo
->vector_mode
);
11243 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
, vector_size
);
11244 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11245 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11249 /* Function get_mask_type_for_scalar_type.
11251 Returns the mask type corresponding to a result of comparison
11252 of vectors of specified SCALAR_TYPE as supported by target. */
11255 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
)
11257 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
11262 return truth_type_for (vectype
);
11265 /* Function get_same_sized_vectype
11267 Returns a vector type corresponding to SCALAR_TYPE of size
11268 VECTOR_TYPE if supported by the target. */
11271 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11273 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11274 return truth_type_for (vector_type
);
11276 return get_vectype_for_scalar_type_and_size
11277 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
11280 /* Function vect_is_simple_use.
11283 VINFO - the vect info of the loop or basic block that is being vectorized.
11284 OPERAND - operand in the loop or bb.
11286 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11287 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11288 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11289 the definition could be anywhere in the function
11290 DT - the type of definition
11292 Returns whether a stmt with OPERAND can be vectorized.
11293 For loops, supportable operands are constants, loop invariants, and operands
11294 that are defined by the current iteration of the loop. Unsupportable
11295 operands are those that are defined by a previous iteration of the loop (as
11296 is the case in reduction/induction computations).
11297 For basic blocks, supportable operands are constants and bb invariants.
11298 For now, operands defined outside the basic block are not supported. */
11301 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11302 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11304 if (def_stmt_info_out
)
11305 *def_stmt_info_out
= NULL
;
11307 *def_stmt_out
= NULL
;
11308 *dt
= vect_unknown_def_type
;
11310 if (dump_enabled_p ())
11312 dump_printf_loc (MSG_NOTE
, vect_location
,
11313 "vect_is_simple_use: operand ");
11314 if (TREE_CODE (operand
) == SSA_NAME
11315 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11316 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11318 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11321 if (CONSTANT_CLASS_P (operand
))
11322 *dt
= vect_constant_def
;
11323 else if (is_gimple_min_invariant (operand
))
11324 *dt
= vect_external_def
;
11325 else if (TREE_CODE (operand
) != SSA_NAME
)
11326 *dt
= vect_unknown_def_type
;
11327 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11328 *dt
= vect_external_def
;
11331 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11332 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11334 *dt
= vect_external_def
;
11337 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11338 def_stmt
= stmt_vinfo
->stmt
;
11339 switch (gimple_code (def_stmt
))
11342 case GIMPLE_ASSIGN
:
11344 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11347 *dt
= vect_unknown_def_type
;
11350 if (def_stmt_info_out
)
11351 *def_stmt_info_out
= stmt_vinfo
;
11354 *def_stmt_out
= def_stmt
;
11357 if (dump_enabled_p ())
11359 dump_printf (MSG_NOTE
, ", type of def: ");
11362 case vect_uninitialized_def
:
11363 dump_printf (MSG_NOTE
, "uninitialized\n");
11365 case vect_constant_def
:
11366 dump_printf (MSG_NOTE
, "constant\n");
11368 case vect_external_def
:
11369 dump_printf (MSG_NOTE
, "external\n");
11371 case vect_internal_def
:
11372 dump_printf (MSG_NOTE
, "internal\n");
11374 case vect_induction_def
:
11375 dump_printf (MSG_NOTE
, "induction\n");
11377 case vect_reduction_def
:
11378 dump_printf (MSG_NOTE
, "reduction\n");
11380 case vect_double_reduction_def
:
11381 dump_printf (MSG_NOTE
, "double reduction\n");
11383 case vect_nested_cycle
:
11384 dump_printf (MSG_NOTE
, "nested cycle\n");
11386 case vect_unknown_def_type
:
11387 dump_printf (MSG_NOTE
, "unknown\n");
11392 if (*dt
== vect_unknown_def_type
)
11394 if (dump_enabled_p ())
11395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11396 "Unsupported pattern.\n");
11403 /* Function vect_is_simple_use.
11405 Same as vect_is_simple_use but also determines the vector operand
11406 type of OPERAND and stores it to *VECTYPE. If the definition of
11407 OPERAND is vect_uninitialized_def, vect_constant_def or
11408 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11409 is responsible to compute the best suited vector type for the
11413 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11414 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11415 gimple
**def_stmt_out
)
11417 stmt_vec_info def_stmt_info
;
11419 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11423 *def_stmt_out
= def_stmt
;
11424 if (def_stmt_info_out
)
11425 *def_stmt_info_out
= def_stmt_info
;
11427 /* Now get a vector type if the def is internal, otherwise supply
11428 NULL_TREE and leave it up to the caller to figure out a proper
11429 type for the use stmt. */
11430 if (*dt
== vect_internal_def
11431 || *dt
== vect_induction_def
11432 || *dt
== vect_reduction_def
11433 || *dt
== vect_double_reduction_def
11434 || *dt
== vect_nested_cycle
)
11436 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11437 gcc_assert (*vectype
!= NULL_TREE
);
11438 if (dump_enabled_p ())
11439 dump_printf_loc (MSG_NOTE
, vect_location
,
11440 "vect_is_simple_use: vectype %T\n", *vectype
);
11442 else if (*dt
== vect_uninitialized_def
11443 || *dt
== vect_constant_def
11444 || *dt
== vect_external_def
)
11445 *vectype
= NULL_TREE
;
11447 gcc_unreachable ();
11453 /* Function supportable_widening_operation
11455 Check whether an operation represented by the code CODE is a
11456 widening operation that is supported by the target platform in
11457 vector form (i.e., when operating on arguments of type VECTYPE_IN
11458 producing a result of type VECTYPE_OUT).
11460 Widening operations we currently support are NOP (CONVERT), FLOAT,
11461 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11462 are supported by the target platform either directly (via vector
11463 tree-codes), or via target builtins.
11466 - CODE1 and CODE2 are codes of vector operations to be used when
11467 vectorizing the operation, if available.
11468 - MULTI_STEP_CVT determines the number of required intermediate steps in
11469 case of multi-step conversion (like char->short->int - in that case
11470 MULTI_STEP_CVT will be 1).
11471 - INTERM_TYPES contains the intermediate type required to perform the
11472 widening operation (short in the above example). */
11475 supportable_widening_operation (enum tree_code code
, stmt_vec_info stmt_info
,
11476 tree vectype_out
, tree vectype_in
,
11477 enum tree_code
*code1
, enum tree_code
*code2
,
11478 int *multi_step_cvt
,
11479 vec
<tree
> *interm_types
)
11481 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
11482 class loop
*vect_loop
= NULL
;
11483 machine_mode vec_mode
;
11484 enum insn_code icode1
, icode2
;
11485 optab optab1
, optab2
;
11486 tree vectype
= vectype_in
;
11487 tree wide_vectype
= vectype_out
;
11488 enum tree_code c1
, c2
;
11490 tree prev_type
, intermediate_type
;
11491 machine_mode intermediate_mode
, prev_mode
;
11492 optab optab3
, optab4
;
11494 *multi_step_cvt
= 0;
11496 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11500 case WIDEN_MULT_EXPR
:
11501 /* The result of a vectorized widening operation usually requires
11502 two vectors (because the widened results do not fit into one vector).
11503 The generated vector results would normally be expected to be
11504 generated in the same order as in the original scalar computation,
11505 i.e. if 8 results are generated in each vector iteration, they are
11506 to be organized as follows:
11507 vect1: [res1,res2,res3,res4],
11508 vect2: [res5,res6,res7,res8].
11510 However, in the special case that the result of the widening
11511 operation is used in a reduction computation only, the order doesn't
11512 matter (because when vectorizing a reduction we change the order of
11513 the computation). Some targets can take advantage of this and
11514 generate more efficient code. For example, targets like Altivec,
11515 that support widen_mult using a sequence of {mult_even,mult_odd}
11516 generate the following vectors:
11517 vect1: [res1,res3,res5,res7],
11518 vect2: [res2,res4,res6,res8].
11520 When vectorizing outer-loops, we execute the inner-loop sequentially
11521 (each vectorized inner-loop iteration contributes to VF outer-loop
11522 iterations in parallel). We therefore don't allow to change the
11523 order of the computation in the inner-loop during outer-loop
11525 /* TODO: Another case in which order doesn't *really* matter is when we
11526 widen and then contract again, e.g. (short)((int)x * y >> 8).
11527 Normally, pack_trunc performs an even/odd permute, whereas the
11528 repack from an even/odd expansion would be an interleave, which
11529 would be significantly simpler for e.g. AVX2. */
11530 /* In any case, in order to avoid duplicating the code below, recurse
11531 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11532 are properly set up for the caller. If we fail, we'll continue with
11533 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11535 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11536 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11537 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
11538 stmt_info
, vectype_out
,
11539 vectype_in
, code1
, code2
,
11540 multi_step_cvt
, interm_types
))
11542 /* Elements in a vector with vect_used_by_reduction property cannot
11543 be reordered if the use chain with this property does not have the
11544 same operation. One such an example is s += a * b, where elements
11545 in a and b cannot be reordered. Here we check if the vector defined
11546 by STMT is only directly used in the reduction statement. */
11547 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11548 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11550 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11553 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11554 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11557 case DOT_PROD_EXPR
:
11558 c1
= DOT_PROD_EXPR
;
11559 c2
= DOT_PROD_EXPR
;
11567 case VEC_WIDEN_MULT_EVEN_EXPR
:
11568 /* Support the recursion induced just above. */
11569 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11570 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11573 case WIDEN_LSHIFT_EXPR
:
11574 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11575 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11579 c1
= VEC_UNPACK_LO_EXPR
;
11580 c2
= VEC_UNPACK_HI_EXPR
;
11584 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11585 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11588 case FIX_TRUNC_EXPR
:
11589 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11590 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11594 gcc_unreachable ();
11597 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11598 std::swap (c1
, c2
);
11600 if (code
== FIX_TRUNC_EXPR
)
11602 /* The signedness is determined from output operand. */
11603 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11604 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11606 else if (CONVERT_EXPR_CODE_P (code
)
11607 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11608 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11609 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11610 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11612 /* If the input and result modes are the same, a different optab
11613 is needed where we pass in the number of units in vectype. */
11614 optab1
= vec_unpacks_sbool_lo_optab
;
11615 optab2
= vec_unpacks_sbool_hi_optab
;
11619 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11620 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11623 if (!optab1
|| !optab2
)
11626 vec_mode
= TYPE_MODE (vectype
);
11627 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11628 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11634 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11635 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11637 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11639 /* For scalar masks we may have different boolean
11640 vector types having the same QImode. Thus we
11641 add additional check for elements number. */
11642 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11643 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11647 /* Check if it's a multi-step conversion that can be done using intermediate
11650 prev_type
= vectype
;
11651 prev_mode
= vec_mode
;
11653 if (!CONVERT_EXPR_CODE_P (code
))
11656 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11657 intermediate steps in promotion sequence. We try
11658 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11660 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11661 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11663 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11664 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11666 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
11669 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11670 TYPE_UNSIGNED (prev_type
));
11672 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11673 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11674 && intermediate_mode
== prev_mode
11675 && SCALAR_INT_MODE_P (prev_mode
))
11677 /* If the input and result modes are the same, a different optab
11678 is needed where we pass in the number of units in vectype. */
11679 optab3
= vec_unpacks_sbool_lo_optab
;
11680 optab4
= vec_unpacks_sbool_hi_optab
;
11684 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11685 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11688 if (!optab3
|| !optab4
11689 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11690 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11691 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11692 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11693 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11694 == CODE_FOR_nothing
)
11695 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11696 == CODE_FOR_nothing
))
11699 interm_types
->quick_push (intermediate_type
);
11700 (*multi_step_cvt
)++;
11702 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11703 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11705 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11707 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11708 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11712 prev_type
= intermediate_type
;
11713 prev_mode
= intermediate_mode
;
11716 interm_types
->release ();
11721 /* Function supportable_narrowing_operation
11723 Check whether an operation represented by the code CODE is a
11724 narrowing operation that is supported by the target platform in
11725 vector form (i.e., when operating on arguments of type VECTYPE_IN
11726 and producing a result of type VECTYPE_OUT).
11728 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11729 and FLOAT. This function checks if these operations are supported by
11730 the target platform directly via vector tree-codes.
11733 - CODE1 is the code of a vector operation to be used when
11734 vectorizing the operation, if available.
11735 - MULTI_STEP_CVT determines the number of required intermediate steps in
11736 case of multi-step conversion (like int->short->char - in that case
11737 MULTI_STEP_CVT will be 1).
11738 - INTERM_TYPES contains the intermediate type required to perform the
11739 narrowing operation (short in the above example). */
11742 supportable_narrowing_operation (enum tree_code code
,
11743 tree vectype_out
, tree vectype_in
,
11744 enum tree_code
*code1
, int *multi_step_cvt
,
11745 vec
<tree
> *interm_types
)
11747 machine_mode vec_mode
;
11748 enum insn_code icode1
;
11749 optab optab1
, interm_optab
;
11750 tree vectype
= vectype_in
;
11751 tree narrow_vectype
= vectype_out
;
11753 tree intermediate_type
, prev_type
;
11754 machine_mode intermediate_mode
, prev_mode
;
11758 *multi_step_cvt
= 0;
11762 c1
= VEC_PACK_TRUNC_EXPR
;
11763 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
11764 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11765 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
11766 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11767 optab1
= vec_pack_sbool_trunc_optab
;
11769 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11772 case FIX_TRUNC_EXPR
:
11773 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
11774 /* The signedness is determined from output operand. */
11775 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11779 c1
= VEC_PACK_FLOAT_EXPR
;
11780 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11784 gcc_unreachable ();
11790 vec_mode
= TYPE_MODE (vectype
);
11791 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
11796 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11798 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11800 /* For scalar masks we may have different boolean
11801 vector types having the same QImode. Thus we
11802 add additional check for elements number. */
11803 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
11804 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11808 if (code
== FLOAT_EXPR
)
11811 /* Check if it's a multi-step conversion that can be done using intermediate
11813 prev_mode
= vec_mode
;
11814 prev_type
= vectype
;
11815 if (code
== FIX_TRUNC_EXPR
)
11816 uns
= TYPE_UNSIGNED (vectype_out
);
11818 uns
= TYPE_UNSIGNED (vectype
);
11820 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11821 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11822 costly than signed. */
11823 if (code
== FIX_TRUNC_EXPR
&& uns
)
11825 enum insn_code icode2
;
11828 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
11830 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11831 if (interm_optab
!= unknown_optab
11832 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
11833 && insn_data
[icode1
].operand
[0].mode
11834 == insn_data
[icode2
].operand
[0].mode
)
11837 optab1
= interm_optab
;
11842 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11843 intermediate steps in promotion sequence. We try
11844 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11845 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11846 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11848 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11849 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11851 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
11854 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
11855 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11856 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11857 && intermediate_mode
== prev_mode
11858 && SCALAR_INT_MODE_P (prev_mode
))
11859 interm_optab
= vec_pack_sbool_trunc_optab
;
11862 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
11865 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
11866 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11867 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
11868 == CODE_FOR_nothing
))
11871 interm_types
->quick_push (intermediate_type
);
11872 (*multi_step_cvt
)++;
11874 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11876 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11878 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
11879 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11883 prev_mode
= intermediate_mode
;
11884 prev_type
= intermediate_type
;
11885 optab1
= interm_optab
;
11888 interm_types
->release ();
11892 /* Generate and return a statement that sets vector mask MASK such that
11893 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11896 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
11898 tree cmp_type
= TREE_TYPE (start_index
);
11899 tree mask_type
= TREE_TYPE (mask
);
11900 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
11901 cmp_type
, mask_type
,
11902 OPTIMIZE_FOR_SPEED
));
11903 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
11904 start_index
, end_index
,
11905 build_zero_cst (mask_type
));
11906 gimple_call_set_lhs (call
, mask
);
11910 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11911 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11914 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
11917 tree tmp
= make_ssa_name (mask_type
);
11918 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
11919 gimple_seq_add_stmt (seq
, call
);
11920 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
11923 /* Try to compute the vector types required to vectorize STMT_INFO,
11924 returning true on success and false if vectorization isn't possible.
11928 - Set *STMT_VECTYPE_OUT to:
11929 - NULL_TREE if the statement doesn't need to be vectorized;
11930 - boolean_type_node if the statement is a boolean operation whose
11931 vector type can only be determined once all the other vector types
11933 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11935 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11936 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11937 statement does not help to determine the overall number of units. */
11940 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
11941 tree
*stmt_vectype_out
,
11942 tree
*nunits_vectype_out
)
11944 vec_info
*vinfo
= stmt_info
->vinfo
;
11945 gimple
*stmt
= stmt_info
->stmt
;
11947 *stmt_vectype_out
= NULL_TREE
;
11948 *nunits_vectype_out
= NULL_TREE
;
11950 if (gimple_get_lhs (stmt
) == NULL_TREE
11951 /* MASK_STORE has no lhs, but is ok. */
11952 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11954 if (is_a
<gcall
*> (stmt
))
11956 /* Ignore calls with no lhs. These must be calls to
11957 #pragma omp simd functions, and what vectorization factor
11958 it really needs can't be determined until
11959 vectorizable_simd_clone_call. */
11960 if (dump_enabled_p ())
11961 dump_printf_loc (MSG_NOTE
, vect_location
,
11962 "defer to SIMD clone analysis.\n");
11963 return opt_result::success ();
11966 return opt_result::failure_at (stmt
,
11967 "not vectorized: irregular stmt.%G", stmt
);
11970 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
11971 return opt_result::failure_at (stmt
,
11972 "not vectorized: vector stmt in loop:%G",
11976 tree scalar_type
= NULL_TREE
;
11977 if (STMT_VINFO_VECTYPE (stmt_info
))
11978 *stmt_vectype_out
= vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11981 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
11982 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11983 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
11985 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
11987 /* Pure bool ops don't participate in number-of-units computation.
11988 For comparisons use the types being compared. */
11989 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
)
11990 && is_gimple_assign (stmt
)
11991 && gimple_assign_rhs_code (stmt
) != COND_EXPR
)
11993 *stmt_vectype_out
= boolean_type_node
;
11995 tree rhs1
= gimple_assign_rhs1 (stmt
);
11996 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
11997 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
11998 scalar_type
= TREE_TYPE (rhs1
);
12001 if (dump_enabled_p ())
12002 dump_printf_loc (MSG_NOTE
, vect_location
,
12003 "pure bool operation.\n");
12004 return opt_result::success ();
12008 if (dump_enabled_p ())
12009 dump_printf_loc (MSG_NOTE
, vect_location
,
12010 "get vectype for scalar type: %T\n", scalar_type
);
12011 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
12013 return opt_result::failure_at (stmt
,
12015 " unsupported data-type %T\n",
12018 if (!*stmt_vectype_out
)
12019 *stmt_vectype_out
= vectype
;
12021 if (dump_enabled_p ())
12022 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12025 /* Don't try to compute scalar types if the stmt produces a boolean
12026 vector; use the existing vector type instead. */
12027 tree nunits_vectype
;
12028 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
12029 nunits_vectype
= vectype
;
12032 /* The number of units is set according to the smallest scalar
12033 type (or the largest vector size, but we only support one
12034 vector size per vectorization). */
12035 if (*stmt_vectype_out
!= boolean_type_node
)
12037 HOST_WIDE_INT dummy
;
12038 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12041 if (dump_enabled_p ())
12042 dump_printf_loc (MSG_NOTE
, vect_location
,
12043 "get vectype for scalar type: %T\n", scalar_type
);
12044 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
12046 if (!nunits_vectype
)
12047 return opt_result::failure_at (stmt
,
12048 "not vectorized: unsupported data-type %T\n",
12051 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
12052 GET_MODE_SIZE (TYPE_MODE (nunits_vectype
))))
12053 return opt_result::failure_at (stmt
,
12054 "not vectorized: different sized vector "
12055 "types in statement, %T and %T\n",
12056 vectype
, nunits_vectype
);
12058 if (dump_enabled_p ())
12060 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n",
12063 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12064 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12065 dump_printf (MSG_NOTE
, "\n");
12068 *nunits_vectype_out
= nunits_vectype
;
12069 return opt_result::success ();
12072 /* Try to determine the correct vector type for STMT_INFO, which is a
12073 statement that produces a scalar boolean result. Return the vector
12074 type on success, otherwise return NULL_TREE. */
12077 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info
)
12079 vec_info
*vinfo
= stmt_info
->vinfo
;
12080 gimple
*stmt
= stmt_info
->stmt
;
12081 tree mask_type
= NULL
;
12082 tree vectype
, scalar_type
;
12084 if (is_gimple_assign (stmt
)
12085 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
12086 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt
))))
12088 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
12089 mask_type
= get_mask_type_for_scalar_type (vinfo
, scalar_type
);
12092 return opt_tree::failure_at (stmt
,
12093 "not vectorized: unsupported mask\n");
12099 enum vect_def_type dt
;
12101 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
12103 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &dt
, &vectype
))
12104 return opt_tree::failure_at (stmt
,
12105 "not vectorized:can't compute mask"
12106 " type for statement, %G", stmt
);
12108 /* No vectype probably means external definition.
12109 Allow it in case there is another operand which
12110 allows to determine mask type. */
12115 mask_type
= vectype
;
12116 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type
),
12117 TYPE_VECTOR_SUBPARTS (vectype
)))
12118 return opt_tree::failure_at (stmt
,
12119 "not vectorized: different sized mask"
12120 " types in statement, %T and %T\n",
12121 mask_type
, vectype
);
12122 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
12123 != VECTOR_BOOLEAN_TYPE_P (vectype
))
12124 return opt_tree::failure_at (stmt
,
12125 "not vectorized: mixed mask and "
12126 "nonmask vector types in statement, "
12128 mask_type
, vectype
);
12131 /* We may compare boolean value loaded as vector of integers.
12132 Fix mask_type in such case. */
12134 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
12135 && gimple_code (stmt
) == GIMPLE_ASSIGN
12136 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
12137 mask_type
= truth_type_for (mask_type
);
12140 /* No mask_type should mean loop invariant predicate.
12141 This is probably a subject for optimization in if-conversion. */
12143 return opt_tree::failure_at (stmt
,
12144 "not vectorized: can't compute mask type "
12145 "for statement: %G", stmt
);
12147 return opt_tree::success (mask_type
);