1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 int misalign
, enum vect_cost_model_location where
)
97 if ((kind
== vector_load
|| kind
== unaligned_load
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_gather_load
;
100 if ((kind
== vector_store
|| kind
== unaligned_store
)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
102 kind
= vector_scatter_store
;
104 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, misalign
};
105 body_cost_vec
->safe_push (si
);
107 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
128 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
130 tree vect_type
, vect
, vect_name
, array_ref
;
133 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
134 vect_type
= TREE_TYPE (TREE_TYPE (array
));
135 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
136 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
137 build_int_cst (size_type_node
, n
),
138 NULL_TREE
, NULL_TREE
);
140 new_stmt
= gimple_build_assign (vect
, array_ref
);
141 vect_name
= make_ssa_name (vect
, new_stmt
);
142 gimple_assign_set_lhs (new_stmt
, vect_name
);
143 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
153 write_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
154 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
159 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
160 build_int_cst (size_type_node
, n
),
161 NULL_TREE
, NULL_TREE
);
163 new_stmt
= gimple_build_assign (array_ref
, vect
);
164 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
172 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
176 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
186 vect_clobber_variable (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
189 tree clobber
= build_clobber (TREE_TYPE (var
));
190 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
191 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
202 enum vect_relevant relevant
, bool live_p
)
204 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
205 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE
, vect_location
,
209 "mark relevant %d, live %d: %G", relevant
, live_p
,
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE
, vect_location
,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info
= stmt_info
;
228 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
230 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
231 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
234 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
235 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
236 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
238 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE
, vect_location
,
243 "already marked relevant/live.\n");
247 worklist
->safe_push (stmt_info
);
251 /* Function is_simple_and_all_uses_invariant
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
257 loop_vec_info loop_vinfo
)
262 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
266 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
268 enum vect_def_type dt
= vect_uninitialized_def
;
270 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
274 "use not simple.\n");
278 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
297 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
298 enum vect_relevant
*relevant
, bool *live_p
)
300 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
302 imm_use_iterator imm_iter
;
306 *relevant
= vect_unused_in_scope
;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info
->stmt
)
311 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
312 *relevant
= vect_used_in_scope
;
314 /* changing memory. */
315 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
316 if (gimple_vdef (stmt_info
->stmt
)
317 && !gimple_clobber_p (stmt_info
->stmt
))
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE
, vect_location
,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant
= vect_used_in_scope
;
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
328 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
330 basic_block bb
= gimple_bb (USE_STMT (use_p
));
331 if (!flow_bb_inside_loop_p (loop
, bb
))
333 if (is_gimple_debug (USE_STMT (use_p
)))
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE
, vect_location
,
338 "vec_stmt_relevant_p: used out of loop.\n");
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
343 gcc_assert (bb
== single_exit (loop
)->dest
);
350 if (*live_p
&& *relevant
== vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE
, vect_location
,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant
= vect_used_only_live
;
359 return (*live_p
|| *relevant
);
363 /* Function exist_non_indexing_operands_for_use_p
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
369 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info
))
379 /* STMT has a data_ref. FORNOW this means that its of one of
383 (This should have been verified in analyze_data_refs).
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
392 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
393 if (!assign
|| !gimple_assign_copy_p (assign
))
395 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
396 if (call
&& gimple_call_internal_p (call
))
398 internal_fn ifn
= gimple_call_internal_fn (call
);
399 int mask_index
= internal_fn_mask_index (ifn
);
401 && use
== gimple_call_arg (call
, mask_index
))
403 int stored_value_index
= internal_fn_stored_value_index (ifn
);
404 if (stored_value_index
>= 0
405 && use
== gimple_call_arg (call
, stored_value_index
))
407 if (internal_gather_scatter_fn_p (ifn
)
408 && use
== gimple_call_arg (call
, 1))
414 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
416 operand
= gimple_assign_rhs1 (assign
);
417 if (TREE_CODE (operand
) != SSA_NAME
)
428 Function process_use.
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
452 Return true if everything is as expected. Return false otherwise. */
455 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
456 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
459 stmt_vec_info dstmt_vinfo
;
460 enum vect_def_type dt
;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
465 return opt_result::success ();
467 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
468 return opt_result::failure_at (stmt_vinfo
->stmt
,
470 " unsupported use in stmt.\n");
473 return opt_result::success ();
475 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
476 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 We have to force the stmt live since the epilogue loop needs it to
480 continue computing the reduction. */
481 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
483 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
485 && bb
->loop_father
== def_bb
->loop_father
)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE
, vect_location
,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
491 return opt_result::success ();
494 /* case 3a: outer-loop stmt defining an inner-loop stmt:
495 outer-loop-header-bb:
501 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_NOTE
, vect_location
,
505 "outer-loop def-stmt defining inner-loop stmt.\n");
509 case vect_unused_in_scope
:
510 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
511 vect_used_in_scope
: vect_unused_in_scope
;
514 case vect_used_in_outer_by_reduction
:
515 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
516 relevant
= vect_used_by_reduction
;
519 case vect_used_in_outer
:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
521 relevant
= vect_used_in_scope
;
524 case vect_used_in_scope
:
532 /* case 3b: inner-loop stmt defining an outer-loop stmt:
533 outer-loop-header-bb:
537 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
539 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE
, vect_location
,
543 "inner-loop def-stmt defining outer-loop stmt.\n");
547 case vect_unused_in_scope
:
548 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
549 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
550 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
553 case vect_used_by_reduction
:
554 case vect_used_only_live
:
555 relevant
= vect_used_in_outer_by_reduction
;
558 case vect_used_in_scope
:
559 relevant
= vect_used_in_outer
;
566 /* We are also not interested in uses on loop PHI backedges that are
567 inductions. Otherwise we'll needlessly vectorize the IV increment
568 and cause hybrid SLP for SLP inductions. Unless the PHI is live
570 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
571 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
572 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
573 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
574 loop_latch_edge (bb
->loop_father
))
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE
, vect_location
,
579 "induction value on backedge.\n");
580 return opt_result::success ();
584 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
585 return opt_result::success ();
589 /* Function vect_mark_stmts_to_be_vectorized.
591 Not all stmts in the loop need to be vectorized. For example:
600 Stmt 1 and 3 do not need to be vectorized, because loop control and
601 addressing of vectorized data-refs are handled differently.
603 This pass detects such stmts. */
606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
608 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
609 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
610 unsigned int nbbs
= loop
->num_nodes
;
611 gimple_stmt_iterator si
;
615 enum vect_relevant relevant
;
617 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
619 auto_vec
<stmt_vec_info
, 64> worklist
;
621 /* 1. Init worklist. */
622 for (i
= 0; i
< nbbs
; i
++)
625 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
627 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
632 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
633 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
635 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
637 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE
, vect_location
,
640 "init: stmt relevant? %G", stmt_info
->stmt
);
642 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
643 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
647 /* 2. Process_worklist */
648 while (worklist
.length () > 0)
653 stmt_vec_info stmt_vinfo
= worklist
.pop ();
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE
, vect_location
,
656 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
658 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
659 (DEF_STMT) as relevant/irrelevant according to the relevance property
661 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
663 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
664 propagated as is to the DEF_STMTs of its USEs.
666 One exception is when STMT has been identified as defining a reduction
667 variable; in this case we set the relevance to vect_used_by_reduction.
668 This is because we distinguish between two kinds of relevant stmts -
669 those that are used by a reduction computation, and those that are
670 (also) used by a regular computation. This allows us later on to
671 identify stmts that are used solely by a reduction, and therefore the
672 order of the results that they produce does not have to be kept. */
674 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
676 case vect_reduction_def
:
677 gcc_assert (relevant
!= vect_unused_in_scope
);
678 if (relevant
!= vect_unused_in_scope
679 && relevant
!= vect_used_in_scope
680 && relevant
!= vect_used_by_reduction
681 && relevant
!= vect_used_only_live
)
682 return opt_result::failure_at
683 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
686 case vect_nested_cycle
:
687 if (relevant
!= vect_unused_in_scope
688 && relevant
!= vect_used_in_outer_by_reduction
689 && relevant
!= vect_used_in_outer
)
690 return opt_result::failure_at
691 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
694 case vect_double_reduction_def
:
695 if (relevant
!= vect_unused_in_scope
696 && relevant
!= vect_used_by_reduction
697 && relevant
!= vect_used_only_live
)
698 return opt_result::failure_at
699 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
706 if (is_pattern_stmt_p (stmt_vinfo
))
708 /* Pattern statements are not inserted into the code, so
709 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
710 have to scan the RHS or function arguments instead. */
711 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
713 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
714 tree op
= gimple_assign_rhs1 (assign
);
717 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
720 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
721 loop_vinfo
, relevant
, &worklist
, false);
724 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
725 loop_vinfo
, relevant
, &worklist
, false);
730 for (; i
< gimple_num_ops (assign
); i
++)
732 op
= gimple_op (assign
, i
);
733 if (TREE_CODE (op
) == SSA_NAME
)
736 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
743 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
745 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
747 tree arg
= gimple_call_arg (call
, i
);
749 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
757 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
759 tree op
= USE_FROM_PTR (use_p
);
761 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
767 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
769 gather_scatter_info gs_info
;
770 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
773 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
782 } /* while worklist */
784 return opt_result::success ();
787 /* Compute the prologue cost for invariant or constant operands. */
790 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
791 unsigned opno
, enum vect_def_type dt
,
792 stmt_vector_for_cost
*cost_vec
)
794 vec_info
*vinfo
= stmt_info
->vinfo
;
795 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
796 tree op
= gimple_op (stmt
, opno
);
797 unsigned prologue_cost
= 0;
799 /* Without looking at the actual initializer a vector of
800 constants can be implemented as load from the constant pool.
801 When all elements are the same we can use a splat. */
802 tree vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), node
);
803 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
804 unsigned num_vects_to_check
;
805 unsigned HOST_WIDE_INT const_nunits
;
807 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
808 && ! multiple_p (const_nunits
, group_size
))
810 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
811 nelt_limit
= const_nunits
;
815 /* If either the vector has variable length or the vectors
816 are composed of repeated whole groups we only need to
817 cost construction once. All vectors will be the same. */
818 num_vects_to_check
= 1;
819 nelt_limit
= group_size
;
821 tree elt
= NULL_TREE
;
823 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
825 unsigned si
= j
% group_size
;
827 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
, opno
);
828 /* ??? We're just tracking whether all operands of a single
829 vector initializer are the same, ideally we'd check if
830 we emitted the same one already. */
831 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
,
835 if (nelt
== nelt_limit
)
837 /* ??? We need to pass down stmt_info for a vector type
838 even if it points to the wrong stmt. */
839 prologue_cost
+= record_stmt_cost
841 dt
== vect_external_def
842 ? (elt
? scalar_to_vec
: vec_construct
)
844 stmt_info
, 0, vect_prologue
);
849 return prologue_cost
;
852 /* Function vect_model_simple_cost.
854 Models cost for simple operations, i.e. those that only emit ncopies of a
855 single op. Right now, this does not account for multiple insns that could
856 be generated for the single vector op. We will handle that shortly. */
859 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
860 enum vect_def_type
*dt
,
863 stmt_vector_for_cost
*cost_vec
,
864 vect_cost_for_stmt kind
= vector_stmt
)
866 int inside_cost
= 0, prologue_cost
= 0;
868 gcc_assert (cost_vec
!= NULL
);
870 /* ??? Somehow we need to fix this at the callers. */
872 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
876 /* Scan operands and account for prologue cost of constants/externals.
877 ??? This over-estimates cost for multiple uses and should be
879 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
880 tree lhs
= gimple_get_lhs (stmt
);
881 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
883 tree op
= gimple_op (stmt
, i
);
884 enum vect_def_type dt
;
885 if (!op
|| op
== lhs
)
887 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
888 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
889 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
894 /* Cost the "broadcast" of a scalar operand in to a vector operand.
895 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
897 for (int i
= 0; i
< ndts
; i
++)
898 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
899 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
900 stmt_info
, 0, vect_prologue
);
902 /* Adjust for two-operator SLP nodes. */
903 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
906 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
907 stmt_info
, 0, vect_body
);
910 /* Pass the inside-of-loop statements to the target-specific cost model. */
911 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
912 stmt_info
, 0, vect_body
);
914 if (dump_enabled_p ())
915 dump_printf_loc (MSG_NOTE
, vect_location
,
916 "vect_model_simple_cost: inside_cost = %d, "
917 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
921 /* Model cost for type demotion and promotion operations. PWR is
922 normally zero for single-step promotions and demotions. It will be
923 one if two-step promotion/demotion is required, and so on. NCOPIES
924 is the number of vector results (and thus number of instructions)
925 for the narrowest end of the operation chain. Each additional
926 step doubles the number of instructions required. */
929 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
930 enum vect_def_type
*dt
,
931 unsigned int ncopies
, int pwr
,
932 stmt_vector_for_cost
*cost_vec
)
935 int inside_cost
= 0, prologue_cost
= 0;
937 for (i
= 0; i
< pwr
+ 1; i
++)
939 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_promote_demote
,
940 stmt_info
, 0, vect_body
);
944 /* FORNOW: Assuming maximum 2 args per stmts. */
945 for (i
= 0; i
< 2; i
++)
946 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
947 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
948 stmt_info
, 0, vect_prologue
);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE
, vect_location
,
952 "vect_model_promotion_demotion_cost: inside_cost = %d, "
953 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
956 /* Returns true if the current function returns DECL. */
959 cfun_returns (tree decl
)
963 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
965 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
968 if (gimple_return_retval (ret
) == decl
)
970 /* We often end up with an aggregate copy to the result decl,
971 handle that case as well. First skip intermediate clobbers
976 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
978 while (gimple_clobber_p (def
));
979 if (is_a
<gassign
*> (def
)
980 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
981 && gimple_assign_rhs1 (def
) == decl
)
987 /* Function vect_model_store_cost
989 Models cost for stores. In the case of grouped accesses, one access
990 has the overhead of the grouped access attributed to it. */
993 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
994 enum vect_def_type dt
,
995 vect_memory_access_type memory_access_type
,
996 vec_load_store_type vls_type
, slp_tree slp_node
,
997 stmt_vector_for_cost
*cost_vec
)
999 unsigned int inside_cost
= 0, prologue_cost
= 0;
1000 stmt_vec_info first_stmt_info
= stmt_info
;
1001 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1003 /* ??? Somehow we need to fix this at the callers. */
1005 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1007 if (vls_type
== VLS_STORE_INVARIANT
)
1010 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
1013 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
1014 stmt_info
, 0, vect_prologue
);
1017 /* Grouped stores update all elements in the group at once,
1018 so we want the DR for the first statement. */
1019 if (!slp_node
&& grouped_access_p
)
1020 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1022 /* True if we should include any once-per-group costs as well as
1023 the cost of the statement itself. For SLP we only get called
1024 once per group anyhow. */
1025 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1027 /* We assume that the cost of a single store-lanes instruction is
1028 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1029 access is instead being provided by a permute-and-store operation,
1030 include the cost of the permutes. */
1032 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1034 /* Uses a high and low interleave or shuffle operations for each
1036 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1037 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1038 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1039 stmt_info
, 0, vect_body
);
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE
, vect_location
,
1043 "vect_model_store_cost: strided group_size = %d .\n",
1047 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1048 /* Costs of the stores. */
1049 if (memory_access_type
== VMAT_ELEMENTWISE
1050 || memory_access_type
== VMAT_GATHER_SCATTER
)
1052 /* N scalar stores plus extracting the elements. */
1053 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1054 inside_cost
+= record_stmt_cost (cost_vec
,
1055 ncopies
* assumed_nunits
,
1056 scalar_store
, stmt_info
, 0, vect_body
);
1059 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1061 if (memory_access_type
== VMAT_ELEMENTWISE
1062 || memory_access_type
== VMAT_STRIDED_SLP
)
1064 /* N scalar stores plus extracting the elements. */
1065 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1066 inside_cost
+= record_stmt_cost (cost_vec
,
1067 ncopies
* assumed_nunits
,
1068 vec_to_scalar
, stmt_info
, 0, vect_body
);
1071 /* When vectorizing a store into the function result assign
1072 a penalty if the function returns in a multi-register location.
1073 In this case we assume we'll end up with having to spill the
1074 vector result and do piecewise loads as a conservative estimate. */
1075 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1077 && (TREE_CODE (base
) == RESULT_DECL
1078 || (DECL_P (base
) && cfun_returns (base
)))
1079 && !aggregate_value_p (base
, cfun
->decl
))
1081 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1082 /* ??? Handle PARALLEL in some way. */
1085 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1086 /* Assume that a single reg-reg move is possible and cheap,
1087 do not account for vector to gp register move cost. */
1091 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1093 stmt_info
, 0, vect_epilogue
);
1095 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1097 stmt_info
, 0, vect_epilogue
);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE
, vect_location
,
1104 "vect_model_store_cost: inside_cost = %d, "
1105 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1109 /* Calculate cost of DR's memory access. */
1111 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1112 unsigned int *inside_cost
,
1113 stmt_vector_for_cost
*body_cost_vec
)
1115 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1116 int alignment_support_scheme
1117 = vect_supportable_dr_alignment (dr_info
, false);
1119 switch (alignment_support_scheme
)
1123 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1124 vector_store
, stmt_info
, 0,
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE
, vect_location
,
1129 "vect_model_store_cost: aligned.\n");
1133 case dr_unaligned_supported
:
1135 /* Here, we assign an additional cost for the unaligned store. */
1136 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1137 unaligned_store
, stmt_info
,
1138 DR_MISALIGNMENT (dr_info
),
1140 if (dump_enabled_p ())
1141 dump_printf_loc (MSG_NOTE
, vect_location
,
1142 "vect_model_store_cost: unaligned supported by "
1147 case dr_unaligned_unsupported
:
1149 *inside_cost
= VECT_MAX_COST
;
1151 if (dump_enabled_p ())
1152 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1153 "vect_model_store_cost: unsupported access.\n");
1163 /* Function vect_model_load_cost
1165 Models cost for loads. In the case of grouped accesses, one access has
1166 the overhead of the grouped access attributed to it. Since unaligned
1167 accesses are supported for loads, we also account for the costs of the
1168 access scheme chosen. */
1171 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1172 vect_memory_access_type memory_access_type
,
1173 slp_instance instance
,
1175 stmt_vector_for_cost
*cost_vec
)
1177 unsigned int inside_cost
= 0, prologue_cost
= 0;
1178 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1180 gcc_assert (cost_vec
);
1182 /* ??? Somehow we need to fix this at the callers. */
1184 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1186 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1188 /* If the load is permuted then the alignment is determined by
1189 the first group element not by the first scalar stmt DR. */
1190 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1191 /* Record the cost for the permutation. */
1193 unsigned assumed_nunits
1194 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1195 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1196 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1197 slp_vf
, instance
, true,
1199 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1200 first_stmt_info
, 0, vect_body
);
1201 /* And adjust the number of loads performed. This handles
1202 redundancies as well as loads that are later dead. */
1203 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1204 bitmap_clear (perm
);
1205 for (unsigned i
= 0;
1206 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1207 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1209 bool load_seen
= false;
1210 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1212 if (i
% assumed_nunits
== 0)
1218 if (bitmap_bit_p (perm
, i
))
1224 <= (DR_GROUP_SIZE (first_stmt_info
)
1225 - DR_GROUP_GAP (first_stmt_info
)
1226 + assumed_nunits
- 1) / assumed_nunits
);
1229 /* Grouped loads read all elements in the group at once,
1230 so we want the DR for the first statement. */
1231 stmt_vec_info first_stmt_info
= stmt_info
;
1232 if (!slp_node
&& grouped_access_p
)
1233 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1235 /* True if we should include any once-per-group costs as well as
1236 the cost of the statement itself. For SLP we only get called
1237 once per group anyhow. */
1238 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1240 /* We assume that the cost of a single load-lanes instruction is
1241 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1242 access is instead being provided by a load-and-permute operation,
1243 include the cost of the permutes. */
1245 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1247 /* Uses an even and odd extract operations or shuffle operations
1248 for each needed permute. */
1249 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1250 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1251 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1252 stmt_info
, 0, vect_body
);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE
, vect_location
,
1256 "vect_model_load_cost: strided group_size = %d .\n",
1260 /* The loads themselves. */
1261 if (memory_access_type
== VMAT_ELEMENTWISE
1262 || memory_access_type
== VMAT_GATHER_SCATTER
)
1264 /* N scalar loads plus gathering them into a vector. */
1265 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1266 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1267 inside_cost
+= record_stmt_cost (cost_vec
,
1268 ncopies
* assumed_nunits
,
1269 scalar_load
, stmt_info
, 0, vect_body
);
1272 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1273 &inside_cost
, &prologue_cost
,
1274 cost_vec
, cost_vec
, true);
1275 if (memory_access_type
== VMAT_ELEMENTWISE
1276 || memory_access_type
== VMAT_STRIDED_SLP
)
1277 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1278 stmt_info
, 0, vect_body
);
1280 if (dump_enabled_p ())
1281 dump_printf_loc (MSG_NOTE
, vect_location
,
1282 "vect_model_load_cost: inside_cost = %d, "
1283 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1287 /* Calculate cost of DR's memory access. */
1289 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1290 bool add_realign_cost
, unsigned int *inside_cost
,
1291 unsigned int *prologue_cost
,
1292 stmt_vector_for_cost
*prologue_cost_vec
,
1293 stmt_vector_for_cost
*body_cost_vec
,
1294 bool record_prologue_costs
)
1296 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1297 int alignment_support_scheme
1298 = vect_supportable_dr_alignment (dr_info
, false);
1300 switch (alignment_support_scheme
)
1304 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1305 stmt_info
, 0, vect_body
);
1307 if (dump_enabled_p ())
1308 dump_printf_loc (MSG_NOTE
, vect_location
,
1309 "vect_model_load_cost: aligned.\n");
1313 case dr_unaligned_supported
:
1315 /* Here, we assign an additional cost for the unaligned load. */
1316 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1317 unaligned_load
, stmt_info
,
1318 DR_MISALIGNMENT (dr_info
),
1321 if (dump_enabled_p ())
1322 dump_printf_loc (MSG_NOTE
, vect_location
,
1323 "vect_model_load_cost: unaligned supported by "
1328 case dr_explicit_realign
:
1330 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1331 vector_load
, stmt_info
, 0, vect_body
);
1332 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1333 vec_perm
, stmt_info
, 0, vect_body
);
1335 /* FIXME: If the misalignment remains fixed across the iterations of
1336 the containing loop, the following cost should be added to the
1338 if (targetm
.vectorize
.builtin_mask_for_load
)
1339 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1340 stmt_info
, 0, vect_body
);
1342 if (dump_enabled_p ())
1343 dump_printf_loc (MSG_NOTE
, vect_location
,
1344 "vect_model_load_cost: explicit realign\n");
1348 case dr_explicit_realign_optimized
:
1350 if (dump_enabled_p ())
1351 dump_printf_loc (MSG_NOTE
, vect_location
,
1352 "vect_model_load_cost: unaligned software "
1355 /* Unaligned software pipeline has a load of an address, an initial
1356 load, and possibly a mask operation to "prime" the loop. However,
1357 if this is an access in a group of loads, which provide grouped
1358 access, then the above cost should only be considered for one
1359 access in the group. Inside the loop, there is a load op
1360 and a realignment op. */
1362 if (add_realign_cost
&& record_prologue_costs
)
1364 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1365 vector_stmt
, stmt_info
,
1367 if (targetm
.vectorize
.builtin_mask_for_load
)
1368 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1369 vector_stmt
, stmt_info
,
1373 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1374 stmt_info
, 0, vect_body
);
1375 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1376 stmt_info
, 0, vect_body
);
1378 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_NOTE
, vect_location
,
1380 "vect_model_load_cost: explicit realign optimized"
1386 case dr_unaligned_unsupported
:
1388 *inside_cost
= VECT_MAX_COST
;
1390 if (dump_enabled_p ())
1391 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1392 "vect_model_load_cost: unsupported access.\n");
1401 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1402 the loop preheader for the vectorized stmt STMT_VINFO. */
1405 vect_init_vector_1 (stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1406 gimple_stmt_iterator
*gsi
)
1409 vect_finish_stmt_generation (stmt_vinfo
, new_stmt
, gsi
);
1412 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1416 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1420 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1423 pe
= loop_preheader_edge (loop
);
1424 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1425 gcc_assert (!new_bb
);
1429 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1431 gimple_stmt_iterator gsi_bb_start
;
1433 gcc_assert (bb_vinfo
);
1434 bb
= BB_VINFO_BB (bb_vinfo
);
1435 gsi_bb_start
= gsi_after_labels (bb
);
1436 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1440 if (dump_enabled_p ())
1441 dump_printf_loc (MSG_NOTE
, vect_location
,
1442 "created new init_stmt: %G", new_stmt
);
1445 /* Function vect_init_vector.
1447 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1448 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1449 vector type a vector with all elements equal to VAL is created first.
1450 Place the initialization at GSI if it is not NULL. Otherwise, place the
1451 initialization at the loop preheader.
1452 Return the DEF of INIT_STMT.
1453 It will be used in the vectorization of STMT_INFO. */
1456 vect_init_vector (stmt_vec_info stmt_info
, tree val
, tree type
,
1457 gimple_stmt_iterator
*gsi
)
1462 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1463 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1465 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1466 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1468 /* Scalar boolean value should be transformed into
1469 all zeros or all ones value before building a vector. */
1470 if (VECTOR_BOOLEAN_TYPE_P (type
))
1472 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1473 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1475 if (CONSTANT_CLASS_P (val
))
1476 val
= integer_zerop (val
) ? false_val
: true_val
;
1479 new_temp
= make_ssa_name (TREE_TYPE (type
));
1480 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1481 val
, true_val
, false_val
);
1482 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1488 gimple_seq stmts
= NULL
;
1489 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1490 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1491 TREE_TYPE (type
), val
);
1493 /* ??? Condition vectorization expects us to do
1494 promotion of invariant/external defs. */
1495 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1496 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1497 !gsi_end_p (gsi2
); )
1499 init_stmt
= gsi_stmt (gsi2
);
1500 gsi_remove (&gsi2
, false);
1501 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1505 val
= build_vector_from_val (type
, val
);
1508 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1509 init_stmt
= gimple_build_assign (new_temp
, val
);
1510 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1514 /* Function vect_get_vec_def_for_operand_1.
1516 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1517 with type DT that will be used in the vectorized stmt. */
1520 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1521 enum vect_def_type dt
)
1524 stmt_vec_info vec_stmt_info
;
1528 /* operand is a constant or a loop invariant. */
1529 case vect_constant_def
:
1530 case vect_external_def
:
1531 /* Code should use vect_get_vec_def_for_operand. */
1534 /* Operand is defined by a loop header phi. In case of nested
1535 cycles we also may have uses of the backedge def. */
1536 case vect_reduction_def
:
1537 case vect_double_reduction_def
:
1538 case vect_nested_cycle
:
1539 case vect_induction_def
:
1540 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1541 || dt
== vect_nested_cycle
);
1544 /* operand is defined inside the loop. */
1545 case vect_internal_def
:
1547 /* Get the def from the vectorized stmt. */
1548 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1549 /* Get vectorized pattern statement. */
1551 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1552 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1553 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1554 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1555 gcc_assert (vec_stmt_info
);
1556 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1557 vec_oprnd
= PHI_RESULT (phi
);
1559 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1569 /* Function vect_get_vec_def_for_operand.
1571 OP is an operand in STMT_VINFO. This function returns a (vector) def
1572 that will be used in the vectorized stmt for STMT_VINFO.
1574 In the case that OP is an SSA_NAME which is defined in the loop, then
1575 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1577 In case OP is an invariant or constant, a new stmt that creates a vector def
1578 needs to be introduced. VECTYPE may be used to specify a required type for
1579 vector invariant. */
1582 vect_get_vec_def_for_operand (tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1585 enum vect_def_type dt
;
1587 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE
, vect_location
,
1591 "vect_get_vec_def_for_operand: %T\n", op
);
1593 stmt_vec_info def_stmt_info
;
1594 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1595 &def_stmt_info
, &def_stmt
);
1596 gcc_assert (is_simple_use
);
1597 if (def_stmt
&& dump_enabled_p ())
1598 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1600 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1602 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1606 vector_type
= vectype
;
1607 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1608 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1609 vector_type
= truth_type_for (stmt_vectype
);
1611 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1613 gcc_assert (vector_type
);
1614 return vect_init_vector (stmt_vinfo
, op
, vector_type
, NULL
);
1617 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1621 /* Function vect_get_vec_def_for_stmt_copy
1623 Return a vector-def for an operand. This function is used when the
1624 vectorized stmt to be created (by the caller to this function) is a "copy"
1625 created in case the vectorized result cannot fit in one vector, and several
1626 copies of the vector-stmt are required. In this case the vector-def is
1627 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1628 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1631 In case the vectorization factor (VF) is bigger than the number
1632 of elements that can fit in a vectype (nunits), we have to generate
1633 more than one vector stmt to vectorize the scalar stmt. This situation
1634 arises when there are multiple data-types operated upon in the loop; the
1635 smallest data-type determines the VF, and as a result, when vectorizing
1636 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1637 vector stmt (each computing a vector of 'nunits' results, and together
1638 computing 'VF' results in each iteration). This function is called when
1639 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1640 which VF=16 and nunits=4, so the number of copies required is 4):
1642 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1644 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1645 VS1.1: vx.1 = memref1 VS1.2
1646 VS1.2: vx.2 = memref2 VS1.3
1647 VS1.3: vx.3 = memref3
1649 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1650 VSnew.1: vz1 = vx.1 + ... VSnew.2
1651 VSnew.2: vz2 = vx.2 + ... VSnew.3
1652 VSnew.3: vz3 = vx.3 + ...
1654 The vectorization of S1 is explained in vectorizable_load.
1655 The vectorization of S2:
1656 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1657 the function 'vect_get_vec_def_for_operand' is called to
1658 get the relevant vector-def for each operand of S2. For operand x it
1659 returns the vector-def 'vx.0'.
1661 To create the remaining copies of the vector-stmt (VSnew.j), this
1662 function is called to get the relevant vector-def for each operand. It is
1663 obtained from the respective VS1.j stmt, which is recorded in the
1664 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1666 For example, to obtain the vector-def 'vx.1' in order to create the
1667 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1668 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1669 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1670 and return its def ('vx.1').
1671 Overall, to create the above sequence this function will be called 3 times:
1672 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1673 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1674 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1677 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1679 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1681 /* Do nothing; can reuse same def. */
1684 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1685 gcc_assert (def_stmt_info
);
1686 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1687 vec_oprnd
= PHI_RESULT (phi
);
1689 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1694 /* Get vectorized definitions for the operands to create a copy of an original
1695 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1698 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1699 vec
<tree
> *vec_oprnds0
,
1700 vec
<tree
> *vec_oprnds1
)
1702 tree vec_oprnd
= vec_oprnds0
->pop ();
1704 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1705 vec_oprnds0
->quick_push (vec_oprnd
);
1707 if (vec_oprnds1
&& vec_oprnds1
->length ())
1709 vec_oprnd
= vec_oprnds1
->pop ();
1710 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1711 vec_oprnds1
->quick_push (vec_oprnd
);
1716 /* Get vectorized definitions for OP0 and OP1. */
1719 vect_get_vec_defs (tree op0
, tree op1
, stmt_vec_info stmt_info
,
1720 vec
<tree
> *vec_oprnds0
,
1721 vec
<tree
> *vec_oprnds1
,
1726 auto_vec
<vec
<tree
> > vec_defs (SLP_TREE_CHILDREN (slp_node
).length ());
1727 vect_get_slp_defs (slp_node
, &vec_defs
, op1
? 2 : 1);
1728 *vec_oprnds0
= vec_defs
[0];
1730 *vec_oprnds1
= vec_defs
[1];
1736 vec_oprnds0
->create (1);
1737 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt_info
);
1738 vec_oprnds0
->quick_push (vec_oprnd
);
1742 vec_oprnds1
->create (1);
1743 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt_info
);
1744 vec_oprnds1
->quick_push (vec_oprnd
);
1749 /* Helper function called by vect_finish_replace_stmt and
1750 vect_finish_stmt_generation. Set the location of the new
1751 statement and create and return a stmt_vec_info for it. */
1753 static stmt_vec_info
1754 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1756 vec_info
*vinfo
= stmt_info
->vinfo
;
1758 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1760 if (dump_enabled_p ())
1761 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1763 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1765 /* While EH edges will generally prevent vectorization, stmt might
1766 e.g. be in a must-not-throw region. Ensure newly created stmts
1767 that could throw are part of the same region. */
1768 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1769 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1770 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1772 return vec_stmt_info
;
1775 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1776 which sets the same scalar result as STMT_INFO did. Create and return a
1777 stmt_vec_info for VEC_STMT. */
1780 vect_finish_replace_stmt (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1782 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1783 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1785 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1786 gsi_replace (&gsi
, vec_stmt
, true);
1788 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1791 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1792 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1795 vect_finish_stmt_generation (stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1796 gimple_stmt_iterator
*gsi
)
1798 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1800 if (!gsi_end_p (*gsi
)
1801 && gimple_has_mem_ops (vec_stmt
))
1803 gimple
*at_stmt
= gsi_stmt (*gsi
);
1804 tree vuse
= gimple_vuse (at_stmt
);
1805 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1807 tree vdef
= gimple_vdef (at_stmt
);
1808 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1809 /* If we have an SSA vuse and insert a store, update virtual
1810 SSA form to avoid triggering the renamer. Do so only
1811 if we can easily see all uses - which is what almost always
1812 happens with the way vectorized stmts are inserted. */
1813 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1814 && ((is_gimple_assign (vec_stmt
)
1815 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1816 || (is_gimple_call (vec_stmt
)
1817 && !(gimple_call_flags (vec_stmt
)
1818 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1820 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1821 gimple_set_vdef (vec_stmt
, new_vdef
);
1822 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1826 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1827 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1830 /* We want to vectorize a call to combined function CFN with function
1831 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1832 as the types of all inputs. Check whether this is possible using
1833 an internal function, returning its code if so or IFN_LAST if not. */
1836 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1837 tree vectype_out
, tree vectype_in
)
1840 if (internal_fn_p (cfn
))
1841 ifn
= as_internal_fn (cfn
);
1843 ifn
= associated_internal_fn (fndecl
);
1844 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1846 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1847 if (info
.vectorizable
)
1849 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1850 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1851 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1852 OPTIMIZE_FOR_SPEED
))
1860 static tree
permute_vec_elements (tree
, tree
, tree
, stmt_vec_info
,
1861 gimple_stmt_iterator
*);
1863 /* Check whether a load or store statement in the loop described by
1864 LOOP_VINFO is possible in a fully-masked loop. This is testing
1865 whether the vectorizer pass has the appropriate support, as well as
1866 whether the target does.
1868 VLS_TYPE says whether the statement is a load or store and VECTYPE
1869 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1870 says how the load or store is going to be implemented and GROUP_SIZE
1871 is the number of load or store statements in the containing group.
1872 If the access is a gather load or scatter store, GS_INFO describes
1873 its arguments. If the load or store is conditional, SCALAR_MASK is the
1874 condition under which it occurs.
1876 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1877 supported, otherwise record the required mask types. */
1880 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1881 vec_load_store_type vls_type
, int group_size
,
1882 vect_memory_access_type memory_access_type
,
1883 gather_scatter_info
*gs_info
, tree scalar_mask
)
1885 /* Invariant loads need no special support. */
1886 if (memory_access_type
== VMAT_INVARIANT
)
1889 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1890 machine_mode vecmode
= TYPE_MODE (vectype
);
1891 bool is_load
= (vls_type
== VLS_LOAD
);
1892 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1895 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1896 : !vect_store_lanes_supported (vectype
, group_size
, true))
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1900 "can't use a fully-masked loop because the"
1901 " target doesn't have an appropriate masked"
1902 " load/store-lanes instruction.\n");
1903 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1906 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1907 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1911 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1913 internal_fn ifn
= (is_load
1914 ? IFN_MASK_GATHER_LOAD
1915 : IFN_MASK_SCATTER_STORE
);
1916 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1917 gs_info
->memory_type
,
1918 gs_info
->offset_vectype
,
1921 if (dump_enabled_p ())
1922 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1923 "can't use a fully-masked loop because the"
1924 " target doesn't have an appropriate masked"
1925 " gather load or scatter store instruction.\n");
1926 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1929 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1930 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1934 if (memory_access_type
!= VMAT_CONTIGUOUS
1935 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1937 /* Element X of the data must come from iteration i * VF + X of the
1938 scalar loop. We need more work to support other mappings. */
1939 if (dump_enabled_p ())
1940 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1941 "can't use a fully-masked loop because an access"
1942 " isn't contiguous.\n");
1943 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1947 machine_mode mask_mode
;
1948 if (!VECTOR_MODE_P (vecmode
)
1949 || !targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1950 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1954 "can't use a fully-masked loop because the target"
1955 " doesn't have the appropriate masked load or"
1957 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1960 /* We might load more scalars than we need for permuting SLP loads.
1961 We checked in get_group_load_store_type that the extra elements
1962 don't leak into a new vector. */
1963 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1964 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1965 unsigned int nvectors
;
1966 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1967 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1972 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1973 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1974 that needs to be applied to all loads and stores in a vectorized loop.
1975 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1977 MASK_TYPE is the type of both masks. If new statements are needed,
1978 insert them before GSI. */
1981 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1982 gimple_stmt_iterator
*gsi
)
1984 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1988 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1989 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1990 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1991 vec_mask
, loop_mask
);
1992 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1996 /* Determine whether we can use a gather load or scatter store to vectorize
1997 strided load or store STMT_INFO by truncating the current offset to a
1998 smaller width. We need to be able to construct an offset vector:
2000 { 0, X, X*2, X*3, ... }
2002 without loss of precision, where X is STMT_INFO's DR_STEP.
2004 Return true if this is possible, describing the gather load or scatter
2005 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2008 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
2009 loop_vec_info loop_vinfo
, bool masked_p
,
2010 gather_scatter_info
*gs_info
)
2012 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2013 data_reference
*dr
= dr_info
->dr
;
2014 tree step
= DR_STEP (dr
);
2015 if (TREE_CODE (step
) != INTEGER_CST
)
2017 /* ??? Perhaps we could use range information here? */
2018 if (dump_enabled_p ())
2019 dump_printf_loc (MSG_NOTE
, vect_location
,
2020 "cannot truncate variable step.\n");
2024 /* Get the number of bits in an element. */
2025 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2026 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2027 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2029 /* Set COUNT to the upper limit on the number of elements - 1.
2030 Start with the maximum vectorization factor. */
2031 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
2033 /* Try lowering COUNT to the number of scalar latch iterations. */
2034 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2035 widest_int max_iters
;
2036 if (max_loop_iterations (loop
, &max_iters
)
2037 && max_iters
< count
)
2038 count
= max_iters
.to_shwi ();
2040 /* Try scales of 1 and the element size. */
2041 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
2042 wi::overflow_type overflow
= wi::OVF_NONE
;
2043 for (int i
= 0; i
< 2; ++i
)
2045 int scale
= scales
[i
];
2047 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
2050 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
2051 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2054 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2055 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
2057 /* Find the narrowest viable offset type. */
2058 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
2059 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
2062 /* See whether the target supports the operation with an offset
2063 no narrower than OFFSET_TYPE. */
2064 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2065 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
2066 vectype
, memory_type
, offset_type
, scale
,
2067 &gs_info
->ifn
, &gs_info
->offset_vectype
))
2070 gs_info
->decl
= NULL_TREE
;
2071 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2072 but we don't need to store that here. */
2073 gs_info
->base
= NULL_TREE
;
2074 gs_info
->element_type
= TREE_TYPE (vectype
);
2075 gs_info
->offset
= fold_convert (offset_type
, step
);
2076 gs_info
->offset_dt
= vect_constant_def
;
2077 gs_info
->scale
= scale
;
2078 gs_info
->memory_type
= memory_type
;
2082 if (overflow
&& dump_enabled_p ())
2083 dump_printf_loc (MSG_NOTE
, vect_location
,
2084 "truncating gather/scatter offset to %d bits"
2085 " might change its value.\n", element_bits
);
2090 /* Return true if we can use gather/scatter internal functions to
2091 vectorize STMT_INFO, which is a grouped or strided load or store.
2092 MASKED_P is true if load or store is conditional. When returning
2093 true, fill in GS_INFO with the information required to perform the
2097 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2098 loop_vec_info loop_vinfo
, bool masked_p
,
2099 gather_scatter_info
*gs_info
)
2101 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2103 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2106 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
2107 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2109 gcc_assert (TYPE_PRECISION (new_offset_type
)
2110 >= TYPE_PRECISION (old_offset_type
));
2111 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
2113 if (dump_enabled_p ())
2114 dump_printf_loc (MSG_NOTE
, vect_location
,
2115 "using gather/scatter for strided/grouped access,"
2116 " scale = %d\n", gs_info
->scale
);
2121 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2122 elements with a known constant step. Return -1 if that step
2123 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2126 compare_step_with_zero (stmt_vec_info stmt_info
)
2128 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2129 return tree_int_cst_compare (vect_dr_behavior (dr_info
)->step
,
2133 /* If the target supports a permute mask that reverses the elements in
2134 a vector of type VECTYPE, return that mask, otherwise return null. */
2137 perm_mask_for_reverse (tree vectype
)
2139 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2141 /* The encoding has a single stepped pattern. */
2142 vec_perm_builder
sel (nunits
, 1, 3);
2143 for (int i
= 0; i
< 3; ++i
)
2144 sel
.quick_push (nunits
- 1 - i
);
2146 vec_perm_indices
indices (sel
, 1, nunits
);
2147 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2149 return vect_gen_perm_mask_checked (vectype
, indices
);
2152 /* A subroutine of get_load_store_type, with a subset of the same
2153 arguments. Handle the case where STMT_INFO is a load or store that
2154 accesses consecutive elements with a negative step. */
2156 static vect_memory_access_type
2157 get_negative_load_store_type (stmt_vec_info stmt_info
, tree vectype
,
2158 vec_load_store_type vls_type
,
2159 unsigned int ncopies
)
2161 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2162 dr_alignment_support alignment_support_scheme
;
2166 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2168 "multiple types with negative step.\n");
2169 return VMAT_ELEMENTWISE
;
2172 alignment_support_scheme
= vect_supportable_dr_alignment (dr_info
, false);
2173 if (alignment_support_scheme
!= dr_aligned
2174 && alignment_support_scheme
!= dr_unaligned_supported
)
2176 if (dump_enabled_p ())
2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2178 "negative step but alignment required.\n");
2179 return VMAT_ELEMENTWISE
;
2182 if (vls_type
== VLS_STORE_INVARIANT
)
2184 if (dump_enabled_p ())
2185 dump_printf_loc (MSG_NOTE
, vect_location
,
2186 "negative step with invariant source;"
2187 " no permute needed.\n");
2188 return VMAT_CONTIGUOUS_DOWN
;
2191 if (!perm_mask_for_reverse (vectype
))
2193 if (dump_enabled_p ())
2194 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2195 "negative step and reversing not supported.\n");
2196 return VMAT_ELEMENTWISE
;
2199 return VMAT_CONTIGUOUS_REVERSE
;
2202 /* STMT_INFO is either a masked or unconditional store. Return the value
2206 vect_get_store_rhs (stmt_vec_info stmt_info
)
2208 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2210 gcc_assert (gimple_assign_single_p (assign
));
2211 return gimple_assign_rhs1 (assign
);
2213 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2215 internal_fn ifn
= gimple_call_internal_fn (call
);
2216 int index
= internal_fn_stored_value_index (ifn
);
2217 gcc_assert (index
>= 0);
2218 return gimple_call_arg (call
, index
);
2223 /* A subroutine of get_load_store_type, with a subset of the same
2224 arguments. Handle the case where STMT_INFO is part of a grouped load
2227 For stores, the statements in the group are all consecutive
2228 and there is no gap at the end. For loads, the statements in the
2229 group might not be consecutive; there can be gaps between statements
2230 as well as at the end. */
2233 get_group_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2234 bool masked_p
, vec_load_store_type vls_type
,
2235 vect_memory_access_type
*memory_access_type
,
2236 gather_scatter_info
*gs_info
)
2238 vec_info
*vinfo
= stmt_info
->vinfo
;
2239 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2240 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2241 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2242 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2243 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2244 bool single_element_p
= (stmt_info
== first_stmt_info
2245 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2246 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2247 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2249 /* True if the vectorized statements would access beyond the last
2250 statement in the group. */
2251 bool overrun_p
= false;
2253 /* True if we can cope with such overrun by peeling for gaps, so that
2254 there is at least one final scalar iteration after the vector loop. */
2255 bool can_overrun_p
= (!masked_p
2256 && vls_type
== VLS_LOAD
2260 /* There can only be a gap at the end of the group if the stride is
2261 known at compile time. */
2262 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2264 /* Stores can't yet have gaps. */
2265 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2269 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2271 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2272 separated by the stride, until we have a complete vector.
2273 Fall back to scalar accesses if that isn't possible. */
2274 if (multiple_p (nunits
, group_size
))
2275 *memory_access_type
= VMAT_STRIDED_SLP
;
2277 *memory_access_type
= VMAT_ELEMENTWISE
;
2281 overrun_p
= loop_vinfo
&& gap
!= 0;
2282 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2284 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2285 "Grouped store with gaps requires"
2286 " non-consecutive accesses\n");
2289 /* An overrun is fine if the trailing elements are smaller
2290 than the alignment boundary B. Every vector access will
2291 be a multiple of B and so we are guaranteed to access a
2292 non-gap element in the same B-sized block. */
2294 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2295 / vect_get_scalar_dr_size (first_dr_info
)))
2298 /* If the gap splits the vector in half and the target
2299 can do half-vector operations avoid the epilogue peeling
2300 by simply loading half of the vector only. Usually
2301 the construction with an upper zero half will be elided. */
2302 dr_alignment_support alignment_support_scheme
;
2303 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2307 && (((alignment_support_scheme
2308 = vect_supportable_dr_alignment (first_dr_info
, false)))
2310 || alignment_support_scheme
== dr_unaligned_supported
)
2311 && known_eq (nunits
, (group_size
- gap
) * 2)
2312 && known_eq (nunits
, group_size
)
2313 && VECTOR_MODE_P (TYPE_MODE (vectype
))
2314 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
2315 group_size
- gap
).exists (&vmode
)
2316 && (convert_optab_handler (vec_init_optab
,
2317 TYPE_MODE (vectype
), vmode
)
2318 != CODE_FOR_nothing
))
2321 if (overrun_p
&& !can_overrun_p
)
2323 if (dump_enabled_p ())
2324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2325 "Peeling for outer loop is not supported\n");
2328 int cmp
= compare_step_with_zero (stmt_info
);
2330 *memory_access_type
= get_negative_load_store_type
2331 (stmt_info
, vectype
, vls_type
, 1);
2334 gcc_assert (!loop_vinfo
|| cmp
> 0);
2335 *memory_access_type
= VMAT_CONTIGUOUS
;
2341 /* We can always handle this case using elementwise accesses,
2342 but see if something more efficient is available. */
2343 *memory_access_type
= VMAT_ELEMENTWISE
;
2345 /* If there is a gap at the end of the group then these optimizations
2346 would access excess elements in the last iteration. */
2347 bool would_overrun_p
= (gap
!= 0);
2348 /* An overrun is fine if the trailing elements are smaller than the
2349 alignment boundary B. Every vector access will be a multiple of B
2350 and so we are guaranteed to access a non-gap element in the
2351 same B-sized block. */
2354 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2355 / vect_get_scalar_dr_size (first_dr_info
)))
2356 would_overrun_p
= false;
2358 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2359 && (can_overrun_p
|| !would_overrun_p
)
2360 && compare_step_with_zero (stmt_info
) > 0)
2362 /* First cope with the degenerate case of a single-element
2364 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2365 *memory_access_type
= VMAT_CONTIGUOUS
;
2367 /* Otherwise try using LOAD/STORE_LANES. */
2368 if (*memory_access_type
== VMAT_ELEMENTWISE
2369 && (vls_type
== VLS_LOAD
2370 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2371 : vect_store_lanes_supported (vectype
, group_size
,
2374 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2375 overrun_p
= would_overrun_p
;
2378 /* If that fails, try using permuting loads. */
2379 if (*memory_access_type
== VMAT_ELEMENTWISE
2380 && (vls_type
== VLS_LOAD
2381 ? vect_grouped_load_supported (vectype
, single_element_p
,
2383 : vect_grouped_store_supported (vectype
, group_size
)))
2385 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2386 overrun_p
= would_overrun_p
;
2390 /* As a last resort, trying using a gather load or scatter store.
2392 ??? Although the code can handle all group sizes correctly,
2393 it probably isn't a win to use separate strided accesses based
2394 on nearby locations. Or, even if it's a win over scalar code,
2395 it might not be a win over vectorizing at a lower VF, if that
2396 allows us to use contiguous accesses. */
2397 if (*memory_access_type
== VMAT_ELEMENTWISE
2400 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2402 *memory_access_type
= VMAT_GATHER_SCATTER
;
2405 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2407 /* STMT is the leader of the group. Check the operands of all the
2408 stmts of the group. */
2409 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2410 while (next_stmt_info
)
2412 tree op
= vect_get_store_rhs (next_stmt_info
);
2413 enum vect_def_type dt
;
2414 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2416 if (dump_enabled_p ())
2417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2418 "use not simple.\n");
2421 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2427 gcc_assert (can_overrun_p
);
2428 if (dump_enabled_p ())
2429 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2430 "Data access with gaps requires scalar "
2432 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2438 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2439 if there is a memory access type that the vectorized form can use,
2440 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2441 or scatters, fill in GS_INFO accordingly.
2443 SLP says whether we're performing SLP rather than loop vectorization.
2444 MASKED_P is true if the statement is conditional on a vectorized mask.
2445 VECTYPE is the vector type that the vectorized statements will use.
2446 NCOPIES is the number of vector statements that will be needed. */
2449 get_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2450 bool masked_p
, vec_load_store_type vls_type
,
2451 unsigned int ncopies
,
2452 vect_memory_access_type
*memory_access_type
,
2453 gather_scatter_info
*gs_info
)
2455 vec_info
*vinfo
= stmt_info
->vinfo
;
2456 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2457 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2458 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2460 *memory_access_type
= VMAT_GATHER_SCATTER
;
2461 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2463 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2464 &gs_info
->offset_dt
,
2465 &gs_info
->offset_vectype
))
2467 if (dump_enabled_p ())
2468 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2469 "%s index use not simple.\n",
2470 vls_type
== VLS_LOAD
? "gather" : "scatter");
2474 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2476 if (!get_group_load_store_type (stmt_info
, vectype
, slp
, masked_p
,
2477 vls_type
, memory_access_type
, gs_info
))
2480 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2484 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2486 *memory_access_type
= VMAT_GATHER_SCATTER
;
2488 *memory_access_type
= VMAT_ELEMENTWISE
;
2492 int cmp
= compare_step_with_zero (stmt_info
);
2494 *memory_access_type
= get_negative_load_store_type
2495 (stmt_info
, vectype
, vls_type
, ncopies
);
2498 gcc_assert (vls_type
== VLS_LOAD
);
2499 *memory_access_type
= VMAT_INVARIANT
;
2502 *memory_access_type
= VMAT_CONTIGUOUS
;
2505 if ((*memory_access_type
== VMAT_ELEMENTWISE
2506 || *memory_access_type
== VMAT_STRIDED_SLP
)
2507 && !nunits
.is_constant ())
2509 if (dump_enabled_p ())
2510 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2511 "Not using elementwise accesses due to variable "
2512 "vectorization factor.\n");
2516 /* FIXME: At the moment the cost model seems to underestimate the
2517 cost of using elementwise accesses. This check preserves the
2518 traditional behavior until that can be fixed. */
2519 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2520 if (!first_stmt_info
)
2521 first_stmt_info
= stmt_info
;
2522 if (*memory_access_type
== VMAT_ELEMENTWISE
2523 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2524 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2525 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2526 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2530 "not falling back to elementwise accesses\n");
2536 /* Return true if boolean argument MASK is suitable for vectorizing
2537 conditional operation STMT_INFO. When returning true, store the type
2538 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2539 in *MASK_VECTYPE_OUT. */
2542 vect_check_scalar_mask (stmt_vec_info stmt_info
, tree mask
,
2543 vect_def_type
*mask_dt_out
,
2544 tree
*mask_vectype_out
)
2546 vec_info
*vinfo
= stmt_info
->vinfo
;
2547 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2549 if (dump_enabled_p ())
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2551 "mask argument is not a boolean.\n");
2555 if (TREE_CODE (mask
) != SSA_NAME
)
2557 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2559 "mask argument is not an SSA name.\n");
2563 enum vect_def_type mask_dt
;
2565 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2567 if (dump_enabled_p ())
2568 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2569 "mask use not simple.\n");
2573 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2575 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2577 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2581 "could not find an appropriate vector mask type.\n");
2585 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2586 TYPE_VECTOR_SUBPARTS (vectype
)))
2588 if (dump_enabled_p ())
2589 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2590 "vector mask type %T"
2591 " does not match vector data type %T.\n",
2592 mask_vectype
, vectype
);
2597 *mask_dt_out
= mask_dt
;
2598 *mask_vectype_out
= mask_vectype
;
2602 /* Return true if stored value RHS is suitable for vectorizing store
2603 statement STMT_INFO. When returning true, store the type of the
2604 definition in *RHS_DT_OUT, the type of the vectorized store value in
2605 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2608 vect_check_store_rhs (stmt_vec_info stmt_info
, tree rhs
,
2609 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2610 vec_load_store_type
*vls_type_out
)
2612 /* In the case this is a store from a constant make sure
2613 native_encode_expr can handle it. */
2614 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2616 if (dump_enabled_p ())
2617 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2618 "cannot encode constant as a byte sequence.\n");
2622 enum vect_def_type rhs_dt
;
2624 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2626 if (dump_enabled_p ())
2627 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2628 "use not simple.\n");
2632 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2633 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2635 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2637 "incompatible vector types.\n");
2641 *rhs_dt_out
= rhs_dt
;
2642 *rhs_vectype_out
= rhs_vectype
;
2643 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2644 *vls_type_out
= VLS_STORE_INVARIANT
;
2646 *vls_type_out
= VLS_STORE
;
2650 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2651 Note that we support masks with floating-point type, in which case the
2652 floats are interpreted as a bitmask. */
2655 vect_build_all_ones_mask (stmt_vec_info stmt_info
, tree masktype
)
2657 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2658 return build_int_cst (masktype
, -1);
2659 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2661 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2662 mask
= build_vector_from_val (masktype
, mask
);
2663 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2665 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2669 for (int j
= 0; j
< 6; ++j
)
2671 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2672 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2673 mask
= build_vector_from_val (masktype
, mask
);
2674 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2679 /* Build an all-zero merge value of type VECTYPE while vectorizing
2680 STMT_INFO as a gather load. */
2683 vect_build_zero_merge_argument (stmt_vec_info stmt_info
, tree vectype
)
2686 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2687 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2688 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2692 for (int j
= 0; j
< 6; ++j
)
2694 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2695 merge
= build_real (TREE_TYPE (vectype
), r
);
2699 merge
= build_vector_from_val (vectype
, merge
);
2700 return vect_init_vector (stmt_info
, merge
, vectype
, NULL
);
2703 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2704 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2705 the gather load operation. If the load is conditional, MASK is the
2706 unvectorized condition and MASK_DT is its definition type, otherwise
2710 vect_build_gather_load_calls (stmt_vec_info stmt_info
,
2711 gimple_stmt_iterator
*gsi
,
2712 stmt_vec_info
*vec_stmt
,
2713 gather_scatter_info
*gs_info
,
2716 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2717 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2718 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2719 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2720 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2721 edge pe
= loop_preheader_edge (loop
);
2722 enum { NARROW
, NONE
, WIDEN
} modifier
;
2723 poly_uint64 gather_off_nunits
2724 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2726 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2727 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2728 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2729 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2730 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2731 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2732 tree scaletype
= TREE_VALUE (arglist
);
2733 tree real_masktype
= masktype
;
2734 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2736 || TREE_CODE (masktype
) == INTEGER_TYPE
2737 || types_compatible_p (srctype
, masktype
)));
2738 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2739 masktype
= truth_type_for (srctype
);
2741 tree mask_halftype
= masktype
;
2742 tree perm_mask
= NULL_TREE
;
2743 tree mask_perm_mask
= NULL_TREE
;
2744 if (known_eq (nunits
, gather_off_nunits
))
2746 else if (known_eq (nunits
* 2, gather_off_nunits
))
2750 /* Currently widening gathers and scatters are only supported for
2751 fixed-length vectors. */
2752 int count
= gather_off_nunits
.to_constant ();
2753 vec_perm_builder
sel (count
, count
, 1);
2754 for (int i
= 0; i
< count
; ++i
)
2755 sel
.quick_push (i
| (count
/ 2));
2757 vec_perm_indices
indices (sel
, 1, count
);
2758 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2761 else if (known_eq (nunits
, gather_off_nunits
* 2))
2765 /* Currently narrowing gathers and scatters are only supported for
2766 fixed-length vectors. */
2767 int count
= nunits
.to_constant ();
2768 vec_perm_builder
sel (count
, count
, 1);
2769 sel
.quick_grow (count
);
2770 for (int i
= 0; i
< count
; ++i
)
2771 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2772 vec_perm_indices
indices (sel
, 2, count
);
2773 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2777 if (mask
&& masktype
== real_masktype
)
2779 for (int i
= 0; i
< count
; ++i
)
2780 sel
[i
] = i
| (count
/ 2);
2781 indices
.new_vector (sel
, 2, count
);
2782 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2785 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2790 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2791 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2793 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2794 if (!is_gimple_min_invariant (ptr
))
2797 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2798 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2799 gcc_assert (!new_bb
);
2802 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2804 tree vec_oprnd0
= NULL_TREE
;
2805 tree vec_mask
= NULL_TREE
;
2806 tree src_op
= NULL_TREE
;
2807 tree mask_op
= NULL_TREE
;
2808 tree prev_res
= NULL_TREE
;
2809 stmt_vec_info prev_stmt_info
= NULL
;
2813 src_op
= vect_build_zero_merge_argument (stmt_info
, rettype
);
2814 mask_op
= vect_build_all_ones_mask (stmt_info
, masktype
);
2817 for (int j
= 0; j
< ncopies
; ++j
)
2820 if (modifier
== WIDEN
&& (j
& 1))
2821 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2822 perm_mask
, stmt_info
, gsi
);
2825 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
);
2827 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2830 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2832 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2833 TYPE_VECTOR_SUBPARTS (idxtype
)));
2834 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2835 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2836 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2837 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2843 if (mask_perm_mask
&& (j
& 1))
2844 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2845 mask_perm_mask
, stmt_info
, gsi
);
2849 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
);
2850 else if (modifier
!= NARROW
|| (j
& 1) == 0)
2851 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2855 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2857 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2858 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2859 gcc_assert (known_eq (sub1
, sub2
));
2860 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2861 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2863 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2864 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2868 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2870 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2872 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2873 : VEC_UNPACK_LO_EXPR
,
2875 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2881 tree mask_arg
= mask_op
;
2882 if (masktype
!= real_masktype
)
2884 tree utype
, optype
= TREE_TYPE (mask_op
);
2885 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2886 utype
= real_masktype
;
2888 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2889 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2890 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2892 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2893 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2895 if (!useless_type_conversion_p (real_masktype
, utype
))
2897 gcc_assert (TYPE_PRECISION (utype
)
2898 <= TYPE_PRECISION (real_masktype
));
2899 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2900 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2901 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2904 src_op
= build_zero_cst (srctype
);
2906 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2909 stmt_vec_info new_stmt_info
;
2910 if (!useless_type_conversion_p (vectype
, rettype
))
2912 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2913 TYPE_VECTOR_SUBPARTS (rettype
)));
2914 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2915 gimple_call_set_lhs (new_call
, op
);
2916 vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2917 var
= make_ssa_name (vec_dest
);
2918 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2919 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2921 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2925 var
= make_ssa_name (vec_dest
, new_call
);
2926 gimple_call_set_lhs (new_call
, var
);
2928 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2931 if (modifier
== NARROW
)
2938 var
= permute_vec_elements (prev_res
, var
, perm_mask
,
2940 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2943 if (prev_stmt_info
== NULL
)
2944 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2946 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2947 prev_stmt_info
= new_stmt_info
;
2951 /* Prepare the base and offset in GS_INFO for vectorization.
2952 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2953 to the vectorized offset argument for the first copy of STMT_INFO.
2954 STMT_INFO is the statement described by GS_INFO and LOOP is the
2958 vect_get_gather_scatter_ops (class loop
*loop
, stmt_vec_info stmt_info
,
2959 gather_scatter_info
*gs_info
,
2960 tree
*dataref_ptr
, tree
*vec_offset
)
2962 gimple_seq stmts
= NULL
;
2963 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2967 edge pe
= loop_preheader_edge (loop
);
2968 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2969 gcc_assert (!new_bb
);
2971 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
,
2972 gs_info
->offset_vectype
);
2975 /* Prepare to implement a grouped or strided load or store using
2976 the gather load or scatter store operation described by GS_INFO.
2977 STMT_INFO is the load or store statement.
2979 Set *DATAREF_BUMP to the amount that should be added to the base
2980 address after each copy of the vectorized statement. Set *VEC_OFFSET
2981 to an invariant offset vector in which element I has the value
2982 I * DR_STEP / SCALE. */
2985 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2986 loop_vec_info loop_vinfo
,
2987 gather_scatter_info
*gs_info
,
2988 tree
*dataref_bump
, tree
*vec_offset
)
2990 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2991 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2992 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2995 tree bump
= size_binop (MULT_EXPR
,
2996 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2997 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2998 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
3000 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3002 /* The offset given in GS_INFO can have pointer type, so use the element
3003 type of the vector instead. */
3004 tree offset_type
= TREE_TYPE (gs_info
->offset
);
3005 offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3007 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3008 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3009 ssize_int (gs_info
->scale
));
3010 step
= fold_convert (offset_type
, step
);
3011 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
3013 /* Create {0, X, X*2, X*3, ...}. */
3014 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3015 build_zero_cst (offset_type
), step
);
3017 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3020 /* Return the amount that should be added to a vector pointer to move
3021 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3022 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3026 vect_get_data_ptr_increment (dr_vec_info
*dr_info
, tree aggr_type
,
3027 vect_memory_access_type memory_access_type
)
3029 if (memory_access_type
== VMAT_INVARIANT
)
3030 return size_zero_node
;
3032 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3033 tree step
= vect_dr_behavior (dr_info
)->step
;
3034 if (tree_int_cst_sgn (step
) == -1)
3035 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3039 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3042 vectorizable_bswap (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3043 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3044 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3047 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3048 vec_info
*vinfo
= stmt_info
->vinfo
;
3049 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3052 op
= gimple_call_arg (stmt
, 0);
3053 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3054 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3056 /* Multiple types in SLP are handled by creating the appropriate number of
3057 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3062 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3064 gcc_assert (ncopies
>= 1);
3066 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3070 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3071 unsigned word_bytes
;
3072 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3075 /* The encoding uses one stepped pattern for each byte in the word. */
3076 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3077 for (unsigned i
= 0; i
< 3; ++i
)
3078 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3079 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3081 vec_perm_indices
indices (elts
, 1, num_bytes
);
3082 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3087 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3088 DUMP_VECT_SCOPE ("vectorizable_bswap");
3091 record_stmt_cost (cost_vec
,
3092 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3093 record_stmt_cost (cost_vec
,
3094 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3099 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3102 vec
<tree
> vec_oprnds
= vNULL
;
3103 stmt_vec_info new_stmt_info
= NULL
;
3104 stmt_vec_info prev_stmt_info
= NULL
;
3105 for (unsigned j
= 0; j
< ncopies
; j
++)
3109 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
3111 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3113 /* Arguments are ready. create the new vector stmt. */
3116 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3119 tree tem
= make_ssa_name (char_vectype
);
3120 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3121 char_vectype
, vop
));
3122 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3123 tree tem2
= make_ssa_name (char_vectype
);
3124 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3125 tem
, tem
, bswap_vconst
);
3126 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3127 tem
= make_ssa_name (vectype
);
3128 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3131 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3133 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3140 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3142 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3144 prev_stmt_info
= new_stmt_info
;
3147 vec_oprnds
.release ();
3151 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3152 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3153 in a single step. On success, store the binary pack code in
3157 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3158 tree_code
*convert_code
)
3160 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3161 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3165 int multi_step_cvt
= 0;
3166 auto_vec
<tree
, 8> interm_types
;
3167 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3168 &code
, &multi_step_cvt
, &interm_types
)
3172 *convert_code
= code
;
3176 /* Function vectorizable_call.
3178 Check if STMT_INFO performs a function call that can be vectorized.
3179 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3180 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3181 Return true if STMT_INFO is vectorizable in this way. */
3184 vectorizable_call (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3185 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3186 stmt_vector_for_cost
*cost_vec
)
3192 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3193 stmt_vec_info prev_stmt_info
;
3194 tree vectype_out
, vectype_in
;
3195 poly_uint64 nunits_in
;
3196 poly_uint64 nunits_out
;
3197 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3198 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3199 vec_info
*vinfo
= stmt_info
->vinfo
;
3200 tree fndecl
, new_temp
, rhs_type
;
3201 enum vect_def_type dt
[4]
3202 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3203 vect_unknown_def_type
};
3204 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3205 int ndts
= ARRAY_SIZE (dt
);
3207 auto_vec
<tree
, 8> vargs
;
3208 auto_vec
<tree
, 8> orig_vargs
;
3209 enum { NARROW
, NONE
, WIDEN
} modifier
;
3213 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3216 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3220 /* Is STMT_INFO a vectorizable call? */
3221 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3225 if (gimple_call_internal_p (stmt
)
3226 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3227 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3228 /* Handled by vectorizable_load and vectorizable_store. */
3231 if (gimple_call_lhs (stmt
) == NULL_TREE
3232 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3235 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3237 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3239 /* Process function arguments. */
3240 rhs_type
= NULL_TREE
;
3241 vectype_in
= NULL_TREE
;
3242 nargs
= gimple_call_num_args (stmt
);
3244 /* Bail out if the function has more than three arguments, we do not have
3245 interesting builtin functions to vectorize with more than two arguments
3246 except for fma. No arguments is also not good. */
3247 if (nargs
== 0 || nargs
> 4)
3250 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3251 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3252 if (cfn
== CFN_GOMP_SIMD_LANE
)
3255 rhs_type
= unsigned_type_node
;
3259 if (internal_fn_p (cfn
))
3260 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3262 for (i
= 0; i
< nargs
; i
++)
3264 op
= gimple_call_arg (stmt
, i
);
3266 if ((int) i
== mask_opno
)
3268 if (!vect_check_scalar_mask (stmt_info
, op
, &dt
[i
], &vectypes
[i
]))
3273 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &vectypes
[i
]))
3275 if (dump_enabled_p ())
3276 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3277 "use not simple.\n");
3281 /* We can only handle calls with arguments of the same type. */
3283 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3287 "argument types differ.\n");
3291 rhs_type
= TREE_TYPE (op
);
3294 vectype_in
= vectypes
[i
];
3295 else if (vectypes
[i
]
3296 && !types_compatible_p (vectypes
[i
], vectype_in
))
3298 if (dump_enabled_p ())
3299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3300 "argument vector types differ.\n");
3304 /* If all arguments are external or constant defs, infer the vector type
3305 from the scalar type. */
3307 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3309 gcc_assert (vectype_in
);
3312 if (dump_enabled_p ())
3313 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3314 "no vectype for scalar type %T\n", rhs_type
);
3318 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3319 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3320 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3321 by a pack of the two vectors into an SI vector. We would need
3322 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3323 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3325 if (dump_enabled_p ())
3326 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3327 "mismatched vector sizes %T and %T\n",
3328 vectype_in
, vectype_out
);
3332 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3333 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3335 if (dump_enabled_p ())
3336 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3337 "mixed mask and nonmask vector types\n");
3342 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3343 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3344 if (known_eq (nunits_in
* 2, nunits_out
))
3346 else if (known_eq (nunits_out
, nunits_in
))
3348 else if (known_eq (nunits_out
* 2, nunits_in
))
3353 /* We only handle functions that do not read or clobber memory. */
3354 if (gimple_vuse (stmt
))
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3358 "function reads from or writes to memory.\n");
3362 /* For now, we only vectorize functions if a target specific builtin
3363 is available. TODO -- in some cases, it might be profitable to
3364 insert the calls for pieces of the vector, in order to be able
3365 to vectorize other operations in the loop. */
3367 internal_fn ifn
= IFN_LAST
;
3368 tree callee
= gimple_call_fndecl (stmt
);
3370 /* First try using an internal function. */
3371 tree_code convert_code
= ERROR_MARK
;
3373 && (modifier
== NONE
3374 || (modifier
== NARROW
3375 && simple_integer_narrowing (vectype_out
, vectype_in
,
3377 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3380 /* If that fails, try asking for a target-specific built-in function. */
3381 if (ifn
== IFN_LAST
)
3383 if (cfn
!= CFN_LAST
)
3384 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3385 (cfn
, vectype_out
, vectype_in
);
3386 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3387 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3388 (callee
, vectype_out
, vectype_in
);
3391 if (ifn
== IFN_LAST
&& !fndecl
)
3393 if (cfn
== CFN_GOMP_SIMD_LANE
3396 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3397 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3398 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3399 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3401 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3402 { 0, 1, 2, ... vf - 1 } vector. */
3403 gcc_assert (nargs
== 0);
3405 else if (modifier
== NONE
3406 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3407 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3408 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3409 return vectorizable_bswap (stmt_info
, gsi
, vec_stmt
, slp_node
,
3410 vectype_in
, cost_vec
);
3413 if (dump_enabled_p ())
3414 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3415 "function is not vectorizable.\n");
3422 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3423 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3425 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3427 /* Sanity check: make sure that at least one copy of the vectorized stmt
3428 needs to be generated. */
3429 gcc_assert (ncopies
>= 1);
3431 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3432 if (!vec_stmt
) /* transformation not required. */
3434 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3435 DUMP_VECT_SCOPE ("vectorizable_call");
3436 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3437 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3438 record_stmt_cost (cost_vec
, ncopies
/ 2,
3439 vec_promote_demote
, stmt_info
, 0, vect_body
);
3441 if (loop_vinfo
&& mask_opno
>= 0)
3443 unsigned int nvectors
= (slp_node
3444 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3446 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3447 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3448 vectype_out
, scalar_mask
);
3455 if (dump_enabled_p ())
3456 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3459 scalar_dest
= gimple_call_lhs (stmt
);
3460 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3462 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3464 stmt_vec_info new_stmt_info
= NULL
;
3465 prev_stmt_info
= NULL
;
3466 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3468 tree prev_res
= NULL_TREE
;
3469 vargs
.safe_grow (nargs
);
3470 orig_vargs
.safe_grow (nargs
);
3471 for (j
= 0; j
< ncopies
; ++j
)
3473 /* Build argument list for the vectorized call. */
3476 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3477 vec
<tree
> vec_oprnds0
;
3479 vect_get_slp_defs (slp_node
, &vec_defs
);
3480 vec_oprnds0
= vec_defs
[0];
3482 /* Arguments are ready. Create the new vector stmt. */
3483 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3486 for (k
= 0; k
< nargs
; k
++)
3488 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3489 vargs
[k
] = vec_oprndsk
[i
];
3491 if (modifier
== NARROW
)
3493 /* We don't define any narrowing conditional functions
3495 gcc_assert (mask_opno
< 0);
3496 tree half_res
= make_ssa_name (vectype_in
);
3498 = gimple_build_call_internal_vec (ifn
, vargs
);
3499 gimple_call_set_lhs (call
, half_res
);
3500 gimple_call_set_nothrow (call
, true);
3501 vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3504 prev_res
= half_res
;
3507 new_temp
= make_ssa_name (vec_dest
);
3509 = gimple_build_assign (new_temp
, convert_code
,
3510 prev_res
, half_res
);
3512 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
3517 if (mask_opno
>= 0 && masked_loop_p
)
3519 unsigned int vec_num
= vec_oprnds0
.length ();
3520 /* Always true for SLP. */
3521 gcc_assert (ncopies
== 1);
3522 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3524 vargs
[mask_opno
] = prepare_load_store_mask
3525 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3529 if (ifn
!= IFN_LAST
)
3530 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3532 call
= gimple_build_call_vec (fndecl
, vargs
);
3533 new_temp
= make_ssa_name (vec_dest
, call
);
3534 gimple_call_set_lhs (call
, new_temp
);
3535 gimple_call_set_nothrow (call
, true);
3537 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3539 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3542 for (i
= 0; i
< nargs
; i
++)
3544 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3545 vec_oprndsi
.release ();
3550 for (i
= 0; i
< nargs
; i
++)
3552 op
= gimple_call_arg (stmt
, i
);
3555 = vect_get_vec_def_for_operand (op
, stmt_info
, vectypes
[i
]);
3558 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3560 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3563 if (mask_opno
>= 0 && masked_loop_p
)
3565 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3568 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3569 vargs
[mask_opno
], gsi
);
3572 if (cfn
== CFN_GOMP_SIMD_LANE
)
3574 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3576 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3577 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3578 vect_init_vector_1 (stmt_info
, init_stmt
, NULL
);
3579 new_temp
= make_ssa_name (vec_dest
);
3580 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3582 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3584 else if (modifier
== NARROW
)
3586 /* We don't define any narrowing conditional functions at
3588 gcc_assert (mask_opno
< 0);
3589 tree half_res
= make_ssa_name (vectype_in
);
3590 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3591 gimple_call_set_lhs (call
, half_res
);
3592 gimple_call_set_nothrow (call
, true);
3593 vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3596 prev_res
= half_res
;
3599 new_temp
= make_ssa_name (vec_dest
);
3600 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3601 prev_res
, half_res
);
3603 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3608 if (ifn
!= IFN_LAST
)
3609 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3611 call
= gimple_build_call_vec (fndecl
, vargs
);
3612 new_temp
= make_ssa_name (vec_dest
, call
);
3613 gimple_call_set_lhs (call
, new_temp
);
3614 gimple_call_set_nothrow (call
, true);
3616 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3619 if (j
== (modifier
== NARROW
? 1 : 0))
3620 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3622 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3624 prev_stmt_info
= new_stmt_info
;
3627 else if (modifier
== NARROW
)
3629 /* We don't define any narrowing conditional functions at present. */
3630 gcc_assert (mask_opno
< 0);
3631 for (j
= 0; j
< ncopies
; ++j
)
3633 /* Build argument list for the vectorized call. */
3635 vargs
.create (nargs
* 2);
3641 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3642 vec
<tree
> vec_oprnds0
;
3644 vect_get_slp_defs (slp_node
, &vec_defs
);
3645 vec_oprnds0
= vec_defs
[0];
3647 /* Arguments are ready. Create the new vector stmt. */
3648 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3652 for (k
= 0; k
< nargs
; k
++)
3654 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3655 vargs
.quick_push (vec_oprndsk
[i
]);
3656 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3659 if (ifn
!= IFN_LAST
)
3660 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3662 call
= gimple_build_call_vec (fndecl
, vargs
);
3663 new_temp
= make_ssa_name (vec_dest
, call
);
3664 gimple_call_set_lhs (call
, new_temp
);
3665 gimple_call_set_nothrow (call
, true);
3667 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3668 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3671 for (i
= 0; i
< nargs
; i
++)
3673 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3674 vec_oprndsi
.release ();
3679 for (i
= 0; i
< nargs
; i
++)
3681 op
= gimple_call_arg (stmt
, i
);
3685 = vect_get_vec_def_for_operand (op
, stmt_info
,
3688 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3692 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3695 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3697 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3700 vargs
.quick_push (vec_oprnd0
);
3701 vargs
.quick_push (vec_oprnd1
);
3704 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3705 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3706 gimple_call_set_lhs (new_stmt
, new_temp
);
3708 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3711 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3713 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3715 prev_stmt_info
= new_stmt_info
;
3718 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3721 /* No current target implements this case. */
3726 /* The call in STMT might prevent it from being removed in dce.
3727 We however cannot remove it here, due to the way the ssa name
3728 it defines is mapped to the new definition. So just replace
3729 rhs of the statement with something harmless. */
3734 stmt_info
= vect_orig_stmt (stmt_info
);
3735 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3738 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3739 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3745 struct simd_call_arg_info
3749 HOST_WIDE_INT linear_step
;
3750 enum vect_def_type dt
;
3752 bool simd_lane_linear
;
3755 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3756 is linear within simd lane (but not within whole loop), note it in
3760 vect_simd_lane_linear (tree op
, class loop
*loop
,
3761 struct simd_call_arg_info
*arginfo
)
3763 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3765 if (!is_gimple_assign (def_stmt
)
3766 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3767 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3770 tree base
= gimple_assign_rhs1 (def_stmt
);
3771 HOST_WIDE_INT linear_step
= 0;
3772 tree v
= gimple_assign_rhs2 (def_stmt
);
3773 while (TREE_CODE (v
) == SSA_NAME
)
3776 def_stmt
= SSA_NAME_DEF_STMT (v
);
3777 if (is_gimple_assign (def_stmt
))
3778 switch (gimple_assign_rhs_code (def_stmt
))
3781 t
= gimple_assign_rhs2 (def_stmt
);
3782 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3784 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3785 v
= gimple_assign_rhs1 (def_stmt
);
3788 t
= gimple_assign_rhs2 (def_stmt
);
3789 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3791 linear_step
= tree_to_shwi (t
);
3792 v
= gimple_assign_rhs1 (def_stmt
);
3795 t
= gimple_assign_rhs1 (def_stmt
);
3796 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3797 || (TYPE_PRECISION (TREE_TYPE (v
))
3798 < TYPE_PRECISION (TREE_TYPE (t
))))
3807 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3809 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3810 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3815 arginfo
->linear_step
= linear_step
;
3817 arginfo
->simd_lane_linear
= true;
3823 /* Return the number of elements in vector type VECTYPE, which is associated
3824 with a SIMD clone. At present these vectors always have a constant
3827 static unsigned HOST_WIDE_INT
3828 simd_clone_subparts (tree vectype
)
3830 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3833 /* Function vectorizable_simd_clone_call.
3835 Check if STMT_INFO performs a function call that can be vectorized
3836 by calling a simd clone of the function.
3837 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3838 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3839 Return true if STMT_INFO is vectorizable in this way. */
3842 vectorizable_simd_clone_call (stmt_vec_info stmt_info
,
3843 gimple_stmt_iterator
*gsi
,
3844 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3845 stmt_vector_for_cost
*)
3850 tree vec_oprnd0
= NULL_TREE
;
3851 stmt_vec_info prev_stmt_info
;
3853 unsigned int nunits
;
3854 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3855 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3856 vec_info
*vinfo
= stmt_info
->vinfo
;
3857 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3858 tree fndecl
, new_temp
;
3860 auto_vec
<simd_call_arg_info
> arginfo
;
3861 vec
<tree
> vargs
= vNULL
;
3863 tree lhs
, rtype
, ratype
;
3864 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3866 /* Is STMT a vectorizable call? */
3867 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3871 fndecl
= gimple_call_fndecl (stmt
);
3872 if (fndecl
== NULL_TREE
)
3875 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3876 if (node
== NULL
|| node
->simd_clones
== NULL
)
3879 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3882 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3886 if (gimple_call_lhs (stmt
)
3887 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3890 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3892 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3894 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3901 /* Process function arguments. */
3902 nargs
= gimple_call_num_args (stmt
);
3904 /* Bail out if the function has zero arguments. */
3908 arginfo
.reserve (nargs
, true);
3910 for (i
= 0; i
< nargs
; i
++)
3912 simd_call_arg_info thisarginfo
;
3915 thisarginfo
.linear_step
= 0;
3916 thisarginfo
.align
= 0;
3917 thisarginfo
.op
= NULL_TREE
;
3918 thisarginfo
.simd_lane_linear
= false;
3920 op
= gimple_call_arg (stmt
, i
);
3921 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3922 &thisarginfo
.vectype
)
3923 || thisarginfo
.dt
== vect_uninitialized_def
)
3925 if (dump_enabled_p ())
3926 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3927 "use not simple.\n");
3931 if (thisarginfo
.dt
== vect_constant_def
3932 || thisarginfo
.dt
== vect_external_def
)
3933 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3936 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3937 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3939 if (dump_enabled_p ())
3940 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3941 "vector mask arguments are not supported\n");
3946 /* For linear arguments, the analyze phase should have saved
3947 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3948 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3949 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3951 gcc_assert (vec_stmt
);
3952 thisarginfo
.linear_step
3953 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3955 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3956 thisarginfo
.simd_lane_linear
3957 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3958 == boolean_true_node
);
3959 /* If loop has been peeled for alignment, we need to adjust it. */
3960 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3961 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3962 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3964 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3965 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3966 tree opt
= TREE_TYPE (thisarginfo
.op
);
3967 bias
= fold_convert (TREE_TYPE (step
), bias
);
3968 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3970 = fold_build2 (POINTER_TYPE_P (opt
)
3971 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3972 thisarginfo
.op
, bias
);
3976 && thisarginfo
.dt
!= vect_constant_def
3977 && thisarginfo
.dt
!= vect_external_def
3979 && TREE_CODE (op
) == SSA_NAME
3980 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3982 && tree_fits_shwi_p (iv
.step
))
3984 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3985 thisarginfo
.op
= iv
.base
;
3987 else if ((thisarginfo
.dt
== vect_constant_def
3988 || thisarginfo
.dt
== vect_external_def
)
3989 && POINTER_TYPE_P (TREE_TYPE (op
)))
3990 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3991 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3993 if (POINTER_TYPE_P (TREE_TYPE (op
))
3994 && !thisarginfo
.linear_step
3996 && thisarginfo
.dt
!= vect_constant_def
3997 && thisarginfo
.dt
!= vect_external_def
4000 && TREE_CODE (op
) == SSA_NAME
)
4001 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4003 arginfo
.quick_push (thisarginfo
);
4006 unsigned HOST_WIDE_INT vf
;
4007 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
4009 if (dump_enabled_p ())
4010 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4011 "not considering SIMD clones; not yet supported"
4012 " for variable-width vectors.\n");
4016 unsigned int badness
= 0;
4017 struct cgraph_node
*bestn
= NULL
;
4018 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4019 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4021 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4022 n
= n
->simdclone
->next_clone
)
4024 unsigned int this_badness
= 0;
4025 if (n
->simdclone
->simdlen
> vf
4026 || n
->simdclone
->nargs
!= nargs
)
4028 if (n
->simdclone
->simdlen
< vf
)
4029 this_badness
+= (exact_log2 (vf
)
4030 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
4031 if (n
->simdclone
->inbranch
)
4032 this_badness
+= 2048;
4033 int target_badness
= targetm
.simd_clone
.usable (n
);
4034 if (target_badness
< 0)
4036 this_badness
+= target_badness
* 512;
4037 /* FORNOW: Have to add code to add the mask argument. */
4038 if (n
->simdclone
->inbranch
)
4040 for (i
= 0; i
< nargs
; i
++)
4042 switch (n
->simdclone
->args
[i
].arg_type
)
4044 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4045 if (!useless_type_conversion_p
4046 (n
->simdclone
->args
[i
].orig_type
,
4047 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4049 else if (arginfo
[i
].dt
== vect_constant_def
4050 || arginfo
[i
].dt
== vect_external_def
4051 || arginfo
[i
].linear_step
)
4054 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4055 if (arginfo
[i
].dt
!= vect_constant_def
4056 && arginfo
[i
].dt
!= vect_external_def
)
4059 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4060 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4061 if (arginfo
[i
].dt
== vect_constant_def
4062 || arginfo
[i
].dt
== vect_external_def
4063 || (arginfo
[i
].linear_step
4064 != n
->simdclone
->args
[i
].linear_step
))
4067 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4068 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4069 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4070 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4071 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4072 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4076 case SIMD_CLONE_ARG_TYPE_MASK
:
4079 if (i
== (size_t) -1)
4081 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4086 if (arginfo
[i
].align
)
4087 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4088 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4090 if (i
== (size_t) -1)
4092 if (bestn
== NULL
|| this_badness
< badness
)
4095 badness
= this_badness
;
4102 for (i
= 0; i
< nargs
; i
++)
4103 if ((arginfo
[i
].dt
== vect_constant_def
4104 || arginfo
[i
].dt
== vect_external_def
)
4105 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4107 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4108 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4110 if (arginfo
[i
].vectype
== NULL
4111 || (simd_clone_subparts (arginfo
[i
].vectype
)
4112 > bestn
->simdclone
->simdlen
))
4116 fndecl
= bestn
->decl
;
4117 nunits
= bestn
->simdclone
->simdlen
;
4118 ncopies
= vf
/ nunits
;
4120 /* If the function isn't const, only allow it in simd loops where user
4121 has asserted that at least nunits consecutive iterations can be
4122 performed using SIMD instructions. */
4123 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4124 && gimple_vuse (stmt
))
4127 /* Sanity check: make sure that at least one copy of the vectorized stmt
4128 needs to be generated. */
4129 gcc_assert (ncopies
>= 1);
4131 if (!vec_stmt
) /* transformation not required. */
4133 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4134 for (i
= 0; i
< nargs
; i
++)
4135 if ((bestn
->simdclone
->args
[i
].arg_type
4136 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4137 || (bestn
->simdclone
->args
[i
].arg_type
4138 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4140 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4142 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4143 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4144 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4145 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4146 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4147 tree sll
= arginfo
[i
].simd_lane_linear
4148 ? boolean_true_node
: boolean_false_node
;
4149 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4151 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4152 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4153 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4159 if (dump_enabled_p ())
4160 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4163 scalar_dest
= gimple_call_lhs (stmt
);
4164 vec_dest
= NULL_TREE
;
4169 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4170 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4171 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4174 rtype
= TREE_TYPE (ratype
);
4178 prev_stmt_info
= NULL
;
4179 for (j
= 0; j
< ncopies
; ++j
)
4181 /* Build argument list for the vectorized call. */
4183 vargs
.create (nargs
);
4187 for (i
= 0; i
< nargs
; i
++)
4189 unsigned int k
, l
, m
, o
;
4191 op
= gimple_call_arg (stmt
, i
);
4192 switch (bestn
->simdclone
->args
[i
].arg_type
)
4194 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4195 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4196 o
= nunits
/ simd_clone_subparts (atype
);
4197 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4199 if (simd_clone_subparts (atype
)
4200 < simd_clone_subparts (arginfo
[i
].vectype
))
4202 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4203 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4204 / simd_clone_subparts (atype
));
4205 gcc_assert ((k
& (k
- 1)) == 0);
4208 = vect_get_vec_def_for_operand (op
, stmt_info
);
4211 vec_oprnd0
= arginfo
[i
].op
;
4212 if ((m
& (k
- 1)) == 0)
4214 = vect_get_vec_def_for_stmt_copy (vinfo
,
4217 arginfo
[i
].op
= vec_oprnd0
;
4219 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4221 bitsize_int ((m
& (k
- 1)) * prec
));
4223 = gimple_build_assign (make_ssa_name (atype
),
4225 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4226 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4230 k
= (simd_clone_subparts (atype
)
4231 / simd_clone_subparts (arginfo
[i
].vectype
));
4232 gcc_assert ((k
& (k
- 1)) == 0);
4233 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4235 vec_alloc (ctor_elts
, k
);
4238 for (l
= 0; l
< k
; l
++)
4240 if (m
== 0 && l
== 0)
4242 = vect_get_vec_def_for_operand (op
, stmt_info
);
4245 = vect_get_vec_def_for_stmt_copy (vinfo
,
4247 arginfo
[i
].op
= vec_oprnd0
;
4250 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4254 vargs
.safe_push (vec_oprnd0
);
4257 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4259 = gimple_build_assign (make_ssa_name (atype
),
4261 vect_finish_stmt_generation (stmt_info
, new_stmt
,
4263 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4268 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4269 vargs
.safe_push (op
);
4271 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4272 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4277 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4278 &stmts
, true, NULL_TREE
);
4282 edge pe
= loop_preheader_edge (loop
);
4283 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4284 gcc_assert (!new_bb
);
4286 if (arginfo
[i
].simd_lane_linear
)
4288 vargs
.safe_push (arginfo
[i
].op
);
4291 tree phi_res
= copy_ssa_name (op
);
4292 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4293 loop_vinfo
->add_stmt (new_phi
);
4294 add_phi_arg (new_phi
, arginfo
[i
].op
,
4295 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4297 = POINTER_TYPE_P (TREE_TYPE (op
))
4298 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4299 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4300 ? sizetype
: TREE_TYPE (op
);
4302 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4304 tree tcst
= wide_int_to_tree (type
, cst
);
4305 tree phi_arg
= copy_ssa_name (op
);
4307 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4308 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4309 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4310 loop_vinfo
->add_stmt (new_stmt
);
4311 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4313 arginfo
[i
].op
= phi_res
;
4314 vargs
.safe_push (phi_res
);
4319 = POINTER_TYPE_P (TREE_TYPE (op
))
4320 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4321 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4322 ? sizetype
: TREE_TYPE (op
);
4324 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4326 tree tcst
= wide_int_to_tree (type
, cst
);
4327 new_temp
= make_ssa_name (TREE_TYPE (op
));
4329 = gimple_build_assign (new_temp
, code
,
4330 arginfo
[i
].op
, tcst
);
4331 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4332 vargs
.safe_push (new_temp
);
4335 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4336 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4337 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4338 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4339 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4340 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4346 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4349 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4351 new_temp
= create_tmp_var (ratype
);
4352 else if (simd_clone_subparts (vectype
)
4353 == simd_clone_subparts (rtype
))
4354 new_temp
= make_ssa_name (vec_dest
, new_call
);
4356 new_temp
= make_ssa_name (rtype
, new_call
);
4357 gimple_call_set_lhs (new_call
, new_temp
);
4359 stmt_vec_info new_stmt_info
4360 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
4364 if (simd_clone_subparts (vectype
) < nunits
)
4367 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4368 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4369 k
= nunits
/ simd_clone_subparts (vectype
);
4370 gcc_assert ((k
& (k
- 1)) == 0);
4371 for (l
= 0; l
< k
; l
++)
4376 t
= build_fold_addr_expr (new_temp
);
4377 t
= build2 (MEM_REF
, vectype
, t
,
4378 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4381 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4382 bitsize_int (prec
), bitsize_int (l
* prec
));
4384 = gimple_build_assign (make_ssa_name (vectype
), t
);
4386 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4388 if (j
== 0 && l
== 0)
4389 STMT_VINFO_VEC_STMT (stmt_info
)
4390 = *vec_stmt
= new_stmt_info
;
4392 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4394 prev_stmt_info
= new_stmt_info
;
4398 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4401 else if (simd_clone_subparts (vectype
) > nunits
)
4403 unsigned int k
= (simd_clone_subparts (vectype
)
4404 / simd_clone_subparts (rtype
));
4405 gcc_assert ((k
& (k
- 1)) == 0);
4406 if ((j
& (k
- 1)) == 0)
4407 vec_alloc (ret_ctor_elts
, k
);
4410 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4411 for (m
= 0; m
< o
; m
++)
4413 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4414 size_int (m
), NULL_TREE
, NULL_TREE
);
4416 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4418 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
4420 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4421 gimple_assign_lhs (new_stmt
));
4423 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4426 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4427 if ((j
& (k
- 1)) != k
- 1)
4429 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4431 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4433 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4435 if ((unsigned) j
== k
- 1)
4436 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4438 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4440 prev_stmt_info
= new_stmt_info
;
4445 tree t
= build_fold_addr_expr (new_temp
);
4446 t
= build2 (MEM_REF
, vectype
, t
,
4447 build_int_cst (TREE_TYPE (t
), 0));
4449 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4451 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4452 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4457 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4459 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4461 prev_stmt_info
= new_stmt_info
;
4466 /* The call in STMT might prevent it from being removed in dce.
4467 We however cannot remove it here, due to the way the ssa name
4468 it defines is mapped to the new definition. So just replace
4469 rhs of the statement with something harmless. */
4477 type
= TREE_TYPE (scalar_dest
);
4478 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4479 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4482 new_stmt
= gimple_build_nop ();
4483 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4484 unlink_stmt_vdef (stmt
);
4490 /* Function vect_gen_widened_results_half
4492 Create a vector stmt whose code, type, number of arguments, and result
4493 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4494 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4495 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4496 needs to be created (DECL is a function-decl of a target-builtin).
4497 STMT_INFO is the original scalar stmt that we are vectorizing. */
4500 vect_gen_widened_results_half (enum tree_code code
,
4501 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4502 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4503 stmt_vec_info stmt_info
)
4508 /* Generate half of the widened result: */
4509 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4510 if (op_type
!= binary_op
)
4512 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4513 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4514 gimple_assign_set_lhs (new_stmt
, new_temp
);
4515 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4521 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4522 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4523 containing scalar operand), and for the rest we get a copy with
4524 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4525 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4526 The vectors are collected into VEC_OPRNDS. */
4529 vect_get_loop_based_defs (tree
*oprnd
, stmt_vec_info stmt_info
,
4530 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4532 vec_info
*vinfo
= stmt_info
->vinfo
;
4535 /* Get first vector operand. */
4536 /* All the vector operands except the very first one (that is scalar oprnd)
4538 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4539 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt_info
);
4541 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4543 vec_oprnds
->quick_push (vec_oprnd
);
4545 /* Get second vector operand. */
4546 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4547 vec_oprnds
->quick_push (vec_oprnd
);
4551 /* For conversion in multiple steps, continue to get operands
4554 vect_get_loop_based_defs (oprnd
, stmt_info
, vec_oprnds
,
4555 multi_step_cvt
- 1);
4559 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4560 For multi-step conversions store the resulting vectors and call the function
4564 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4566 stmt_vec_info stmt_info
,
4568 gimple_stmt_iterator
*gsi
,
4569 slp_tree slp_node
, enum tree_code code
,
4570 stmt_vec_info
*prev_stmt_info
)
4573 tree vop0
, vop1
, new_tmp
, vec_dest
;
4575 vec_dest
= vec_dsts
.pop ();
4577 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4579 /* Create demotion operation. */
4580 vop0
= (*vec_oprnds
)[i
];
4581 vop1
= (*vec_oprnds
)[i
+ 1];
4582 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4583 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4584 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4585 stmt_vec_info new_stmt_info
4586 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4589 /* Store the resulting vector for next recursive call. */
4590 (*vec_oprnds
)[i
/2] = new_tmp
;
4593 /* This is the last step of the conversion sequence. Store the
4594 vectors in SLP_NODE or in vector info of the scalar statement
4595 (or in STMT_VINFO_RELATED_STMT chain). */
4597 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4600 if (!*prev_stmt_info
)
4601 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4603 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4605 *prev_stmt_info
= new_stmt_info
;
4610 /* For multi-step demotion operations we first generate demotion operations
4611 from the source type to the intermediate types, and then combine the
4612 results (stored in VEC_OPRNDS) in demotion operation to the destination
4616 /* At each level of recursion we have half of the operands we had at the
4618 vec_oprnds
->truncate ((i
+1)/2);
4619 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4620 stmt_info
, vec_dsts
, gsi
,
4621 slp_node
, VEC_PACK_TRUNC_EXPR
,
4625 vec_dsts
.quick_push (vec_dest
);
4629 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4630 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4631 STMT_INFO. For multi-step conversions store the resulting vectors and
4632 call the function recursively. */
4635 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4636 vec
<tree
> *vec_oprnds1
,
4637 stmt_vec_info stmt_info
, tree vec_dest
,
4638 gimple_stmt_iterator
*gsi
,
4639 enum tree_code code1
,
4640 enum tree_code code2
, int op_type
)
4643 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4644 gimple
*new_stmt1
, *new_stmt2
;
4645 vec
<tree
> vec_tmp
= vNULL
;
4647 vec_tmp
.create (vec_oprnds0
->length () * 2);
4648 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4650 if (op_type
== binary_op
)
4651 vop1
= (*vec_oprnds1
)[i
];
4655 /* Generate the two halves of promotion operation. */
4656 new_stmt1
= vect_gen_widened_results_half (code1
, vop0
, vop1
,
4657 op_type
, vec_dest
, gsi
,
4659 new_stmt2
= vect_gen_widened_results_half (code2
, vop0
, vop1
,
4660 op_type
, vec_dest
, gsi
,
4662 if (is_gimple_call (new_stmt1
))
4664 new_tmp1
= gimple_call_lhs (new_stmt1
);
4665 new_tmp2
= gimple_call_lhs (new_stmt2
);
4669 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4670 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4673 /* Store the results for the next step. */
4674 vec_tmp
.quick_push (new_tmp1
);
4675 vec_tmp
.quick_push (new_tmp2
);
4678 vec_oprnds0
->release ();
4679 *vec_oprnds0
= vec_tmp
;
4683 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4684 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4685 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4686 Return true if STMT_INFO is vectorizable in this way. */
4689 vectorizable_conversion (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4690 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4691 stmt_vector_for_cost
*cost_vec
)
4695 tree op0
, op1
= NULL_TREE
;
4696 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4697 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4698 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4699 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4701 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4703 stmt_vec_info prev_stmt_info
;
4704 poly_uint64 nunits_in
;
4705 poly_uint64 nunits_out
;
4706 tree vectype_out
, vectype_in
;
4708 tree lhs_type
, rhs_type
;
4709 enum { NARROW
, NONE
, WIDEN
} modifier
;
4710 vec
<tree
> vec_oprnds0
= vNULL
;
4711 vec
<tree
> vec_oprnds1
= vNULL
;
4713 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4714 vec_info
*vinfo
= stmt_info
->vinfo
;
4715 int multi_step_cvt
= 0;
4716 vec
<tree
> interm_types
= vNULL
;
4717 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4719 unsigned short fltsz
;
4721 /* Is STMT a vectorizable conversion? */
4723 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4726 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4730 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4734 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4737 code
= gimple_assign_rhs_code (stmt
);
4738 if (!CONVERT_EXPR_CODE_P (code
)
4739 && code
!= FIX_TRUNC_EXPR
4740 && code
!= FLOAT_EXPR
4741 && code
!= WIDEN_MULT_EXPR
4742 && code
!= WIDEN_LSHIFT_EXPR
)
4745 op_type
= TREE_CODE_LENGTH (code
);
4747 /* Check types of lhs and rhs. */
4748 scalar_dest
= gimple_assign_lhs (stmt
);
4749 lhs_type
= TREE_TYPE (scalar_dest
);
4750 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4752 op0
= gimple_assign_rhs1 (stmt
);
4753 rhs_type
= TREE_TYPE (op0
);
4755 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4756 && !((INTEGRAL_TYPE_P (lhs_type
)
4757 && INTEGRAL_TYPE_P (rhs_type
))
4758 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4759 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4762 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4763 && ((INTEGRAL_TYPE_P (lhs_type
)
4764 && !type_has_mode_precision_p (lhs_type
))
4765 || (INTEGRAL_TYPE_P (rhs_type
)
4766 && !type_has_mode_precision_p (rhs_type
))))
4768 if (dump_enabled_p ())
4769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4770 "type conversion to/from bit-precision unsupported."
4775 /* Check the operands of the operation. */
4776 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4780 "use not simple.\n");
4783 if (op_type
== binary_op
)
4787 op1
= gimple_assign_rhs2 (stmt
);
4788 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4789 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4791 if (CONSTANT_CLASS_P (op0
))
4792 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4794 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4798 if (dump_enabled_p ())
4799 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4800 "use not simple.\n");
4805 /* If op0 is an external or constant def, infer the vector type
4806 from the scalar type. */
4808 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4810 gcc_assert (vectype_in
);
4813 if (dump_enabled_p ())
4814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4815 "no vectype for scalar type %T\n", rhs_type
);
4820 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4821 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4823 if (dump_enabled_p ())
4824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4825 "can't convert between boolean and non "
4826 "boolean vectors %T\n", rhs_type
);
4831 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4832 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4833 if (known_eq (nunits_out
, nunits_in
))
4835 else if (multiple_p (nunits_out
, nunits_in
))
4839 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4843 /* Multiple types in SLP are handled by creating the appropriate number of
4844 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4848 else if (modifier
== NARROW
)
4849 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4851 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4853 /* Sanity check: make sure that at least one copy of the vectorized stmt
4854 needs to be generated. */
4855 gcc_assert (ncopies
>= 1);
4857 bool found_mode
= false;
4858 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4859 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4860 opt_scalar_mode rhs_mode_iter
;
4862 /* Supportable by target? */
4866 if (code
!= FIX_TRUNC_EXPR
4867 && code
!= FLOAT_EXPR
4868 && !CONVERT_EXPR_CODE_P (code
))
4870 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4874 if (dump_enabled_p ())
4875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4876 "conversion not supported by target.\n");
4880 if (supportable_widening_operation (code
, stmt_info
, vectype_out
,
4881 vectype_in
, &code1
, &code2
,
4882 &multi_step_cvt
, &interm_types
))
4884 /* Binary widening operation can only be supported directly by the
4886 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4890 if (code
!= FLOAT_EXPR
4891 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4894 fltsz
= GET_MODE_SIZE (lhs_mode
);
4895 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4897 rhs_mode
= rhs_mode_iter
.require ();
4898 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4902 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4903 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4904 if (cvt_type
== NULL_TREE
)
4907 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4909 if (!supportable_convert_operation (code
, vectype_out
,
4910 cvt_type
, &codecvt1
))
4913 else if (!supportable_widening_operation (code
, stmt_info
,
4914 vectype_out
, cvt_type
,
4915 &codecvt1
, &codecvt2
,
4920 gcc_assert (multi_step_cvt
== 0);
4922 if (supportable_widening_operation (NOP_EXPR
, stmt_info
, cvt_type
,
4923 vectype_in
, &code1
, &code2
,
4924 &multi_step_cvt
, &interm_types
))
4934 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4935 codecvt2
= ERROR_MARK
;
4939 interm_types
.safe_push (cvt_type
);
4940 cvt_type
= NULL_TREE
;
4945 gcc_assert (op_type
== unary_op
);
4946 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4947 &code1
, &multi_step_cvt
,
4951 if (code
!= FIX_TRUNC_EXPR
4952 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4956 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4957 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4958 if (cvt_type
== NULL_TREE
)
4960 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4963 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4964 &code1
, &multi_step_cvt
,
4973 if (!vec_stmt
) /* transformation not required. */
4975 DUMP_VECT_SCOPE ("vectorizable_conversion");
4976 if (modifier
== NONE
)
4978 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4979 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4982 else if (modifier
== NARROW
)
4984 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4985 /* The final packing step produces one vector result per copy. */
4986 unsigned int nvectors
4987 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
4988 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4989 multi_step_cvt
, cost_vec
);
4993 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4994 /* The initial unpacking step produces two vector results
4995 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4996 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4997 unsigned int nvectors
4999 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5001 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5002 multi_step_cvt
, cost_vec
);
5004 interm_types
.release ();
5009 if (dump_enabled_p ())
5010 dump_printf_loc (MSG_NOTE
, vect_location
,
5011 "transform conversion. ncopies = %d.\n", ncopies
);
5013 if (op_type
== binary_op
)
5015 if (CONSTANT_CLASS_P (op0
))
5016 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5017 else if (CONSTANT_CLASS_P (op1
))
5018 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5021 /* In case of multi-step conversion, we first generate conversion operations
5022 to the intermediate types, and then from that types to the final one.
5023 We create vector destinations for the intermediate type (TYPES) received
5024 from supportable_*_operation, and store them in the correct order
5025 for future use in vect_create_vectorized_*_stmts (). */
5026 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5027 vec_dest
= vect_create_destination_var (scalar_dest
,
5028 (cvt_type
&& modifier
== WIDEN
)
5029 ? cvt_type
: vectype_out
);
5030 vec_dsts
.quick_push (vec_dest
);
5034 for (i
= interm_types
.length () - 1;
5035 interm_types
.iterate (i
, &intermediate_type
); i
--)
5037 vec_dest
= vect_create_destination_var (scalar_dest
,
5039 vec_dsts
.quick_push (vec_dest
);
5044 vec_dest
= vect_create_destination_var (scalar_dest
,
5046 ? vectype_out
: cvt_type
);
5050 if (modifier
== WIDEN
)
5052 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
5053 if (op_type
== binary_op
)
5054 vec_oprnds1
.create (1);
5056 else if (modifier
== NARROW
)
5057 vec_oprnds0
.create (
5058 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
5060 else if (code
== WIDEN_LSHIFT_EXPR
)
5061 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5064 prev_stmt_info
= NULL
;
5068 for (j
= 0; j
< ncopies
; j
++)
5071 vect_get_vec_defs (op0
, NULL
, stmt_info
, &vec_oprnds0
,
5074 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
5076 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5078 stmt_vec_info new_stmt_info
;
5079 /* Arguments are ready, create the new vector stmt. */
5080 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5081 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5082 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5083 gimple_assign_set_lhs (new_stmt
, new_temp
);
5085 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5088 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5091 if (!prev_stmt_info
)
5092 STMT_VINFO_VEC_STMT (stmt_info
)
5093 = *vec_stmt
= new_stmt_info
;
5095 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5096 prev_stmt_info
= new_stmt_info
;
5103 /* In case the vectorization factor (VF) is bigger than the number
5104 of elements that we can fit in a vectype (nunits), we have to
5105 generate more than one vector stmt - i.e - we need to "unroll"
5106 the vector stmt by a factor VF/nunits. */
5107 for (j
= 0; j
< ncopies
; j
++)
5114 if (code
== WIDEN_LSHIFT_EXPR
)
5119 /* Store vec_oprnd1 for every vector stmt to be created
5120 for SLP_NODE. We check during the analysis that all
5121 the shift arguments are the same. */
5122 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5123 vec_oprnds1
.quick_push (vec_oprnd1
);
5125 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
,
5126 &vec_oprnds0
, NULL
, slp_node
);
5129 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
5130 &vec_oprnds1
, slp_node
);
5134 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt_info
);
5135 vec_oprnds0
.quick_push (vec_oprnd0
);
5136 if (op_type
== binary_op
)
5138 if (code
== WIDEN_LSHIFT_EXPR
)
5142 = vect_get_vec_def_for_operand (op1
, stmt_info
);
5143 vec_oprnds1
.quick_push (vec_oprnd1
);
5149 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5150 vec_oprnds0
.truncate (0);
5151 vec_oprnds0
.quick_push (vec_oprnd0
);
5152 if (op_type
== binary_op
)
5154 if (code
== WIDEN_LSHIFT_EXPR
)
5157 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5159 vec_oprnds1
.truncate (0);
5160 vec_oprnds1
.quick_push (vec_oprnd1
);
5164 /* Arguments are ready. Create the new vector stmts. */
5165 for (i
= multi_step_cvt
; i
>= 0; i
--)
5167 tree this_dest
= vec_dsts
[i
];
5168 enum tree_code c1
= code1
, c2
= code2
;
5169 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5174 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5175 &vec_oprnds1
, stmt_info
,
5180 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5182 stmt_vec_info new_stmt_info
;
5185 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5186 new_temp
= make_ssa_name (vec_dest
);
5188 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5190 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5193 new_stmt_info
= vinfo
->lookup_def (vop0
);
5196 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5199 if (!prev_stmt_info
)
5200 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5202 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5203 prev_stmt_info
= new_stmt_info
;
5208 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5212 /* In case the vectorization factor (VF) is bigger than the number
5213 of elements that we can fit in a vectype (nunits), we have to
5214 generate more than one vector stmt - i.e - we need to "unroll"
5215 the vector stmt by a factor VF/nunits. */
5216 for (j
= 0; j
< ncopies
; j
++)
5220 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5224 vec_oprnds0
.truncate (0);
5225 vect_get_loop_based_defs (&last_oprnd
, stmt_info
, &vec_oprnds0
,
5226 vect_pow2 (multi_step_cvt
) - 1);
5229 /* Arguments are ready. Create the new vector stmts. */
5231 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5233 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5234 new_temp
= make_ssa_name (vec_dest
);
5236 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5237 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5238 vec_oprnds0
[i
] = new_temp
;
5241 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5242 stmt_info
, vec_dsts
, gsi
,
5247 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5251 vec_oprnds0
.release ();
5252 vec_oprnds1
.release ();
5253 interm_types
.release ();
5258 /* Return true if we can assume from the scalar form of STMT_INFO that
5259 neither the scalar nor the vector forms will generate code. STMT_INFO
5260 is known not to involve a data reference. */
5263 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5265 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5269 tree lhs
= gimple_assign_lhs (stmt
);
5270 tree_code code
= gimple_assign_rhs_code (stmt
);
5271 tree rhs
= gimple_assign_rhs1 (stmt
);
5273 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5276 if (CONVERT_EXPR_CODE_P (code
))
5277 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5282 /* Function vectorizable_assignment.
5284 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5285 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5286 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5287 Return true if STMT_INFO is vectorizable in this way. */
5290 vectorizable_assignment (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5291 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5292 stmt_vector_for_cost
*cost_vec
)
5297 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5299 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5303 vec
<tree
> vec_oprnds
= vNULL
;
5305 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5306 vec_info
*vinfo
= stmt_info
->vinfo
;
5307 stmt_vec_info prev_stmt_info
= NULL
;
5308 enum tree_code code
;
5311 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5314 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5318 /* Is vectorizable assignment? */
5319 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5323 scalar_dest
= gimple_assign_lhs (stmt
);
5324 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5327 code
= gimple_assign_rhs_code (stmt
);
5328 if (gimple_assign_single_p (stmt
)
5329 || code
== PAREN_EXPR
5330 || CONVERT_EXPR_CODE_P (code
))
5331 op
= gimple_assign_rhs1 (stmt
);
5335 if (code
== VIEW_CONVERT_EXPR
)
5336 op
= TREE_OPERAND (op
, 0);
5338 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5339 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5341 /* Multiple types in SLP are handled by creating the appropriate number of
5342 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5347 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5349 gcc_assert (ncopies
>= 1);
5351 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5353 if (dump_enabled_p ())
5354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5355 "use not simple.\n");
5359 /* We can handle NOP_EXPR conversions that do not change the number
5360 of elements or the vector size. */
5361 if ((CONVERT_EXPR_CODE_P (code
)
5362 || code
== VIEW_CONVERT_EXPR
)
5364 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5365 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5366 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5369 /* We do not handle bit-precision changes. */
5370 if ((CONVERT_EXPR_CODE_P (code
)
5371 || code
== VIEW_CONVERT_EXPR
)
5372 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5373 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5374 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5375 /* But a conversion that does not change the bit-pattern is ok. */
5376 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5377 > TYPE_PRECISION (TREE_TYPE (op
)))
5378 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5379 /* Conversion between boolean types of different sizes is
5380 a simple assignment in case their vectypes are same
5382 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5383 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5385 if (dump_enabled_p ())
5386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5387 "type conversion to/from bit-precision "
5392 if (!vec_stmt
) /* transformation not required. */
5394 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5395 DUMP_VECT_SCOPE ("vectorizable_assignment");
5396 if (!vect_nop_conversion_p (stmt_info
))
5397 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5407 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5410 for (j
= 0; j
< ncopies
; j
++)
5414 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
5416 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5418 /* Arguments are ready. create the new vector stmt. */
5419 stmt_vec_info new_stmt_info
= NULL
;
5420 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5422 if (CONVERT_EXPR_CODE_P (code
)
5423 || code
== VIEW_CONVERT_EXPR
)
5424 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5425 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5426 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5427 gimple_assign_set_lhs (new_stmt
, new_temp
);
5429 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5431 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5438 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5440 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5442 prev_stmt_info
= new_stmt_info
;
5445 vec_oprnds
.release ();
5450 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5451 either as shift by a scalar or by a vector. */
5454 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5457 machine_mode vec_mode
;
5462 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5466 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5468 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5470 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5472 || (optab_handler (optab
, TYPE_MODE (vectype
))
5473 == CODE_FOR_nothing
))
5477 vec_mode
= TYPE_MODE (vectype
);
5478 icode
= (int) optab_handler (optab
, vec_mode
);
5479 if (icode
== CODE_FOR_nothing
)
5486 /* Function vectorizable_shift.
5488 Check if STMT_INFO performs a shift operation that can be vectorized.
5489 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5490 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5491 Return true if STMT_INFO is vectorizable in this way. */
5494 vectorizable_shift (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5495 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5496 stmt_vector_for_cost
*cost_vec
)
5500 tree op0
, op1
= NULL
;
5501 tree vec_oprnd1
= NULL_TREE
;
5503 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5504 enum tree_code code
;
5505 machine_mode vec_mode
;
5509 machine_mode optab_op2_mode
;
5510 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5512 stmt_vec_info prev_stmt_info
;
5513 poly_uint64 nunits_in
;
5514 poly_uint64 nunits_out
;
5519 vec
<tree
> vec_oprnds0
= vNULL
;
5520 vec
<tree
> vec_oprnds1
= vNULL
;
5523 bool scalar_shift_arg
= true;
5524 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5525 vec_info
*vinfo
= stmt_info
->vinfo
;
5526 bool incompatible_op1_vectype_p
= false;
5528 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5531 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5532 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5536 /* Is STMT a vectorizable binary/unary operation? */
5537 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5541 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5544 code
= gimple_assign_rhs_code (stmt
);
5546 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5547 || code
== RROTATE_EXPR
))
5550 scalar_dest
= gimple_assign_lhs (stmt
);
5551 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5552 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5554 if (dump_enabled_p ())
5555 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5556 "bit-precision shifts not supported.\n");
5560 op0
= gimple_assign_rhs1 (stmt
);
5561 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5563 if (dump_enabled_p ())
5564 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5565 "use not simple.\n");
5568 /* If op0 is an external or constant def, infer the vector type
5569 from the scalar type. */
5571 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5573 gcc_assert (vectype
);
5576 if (dump_enabled_p ())
5577 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5578 "no vectype for scalar type\n");
5582 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5583 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5584 if (maybe_ne (nunits_out
, nunits_in
))
5587 op1
= gimple_assign_rhs2 (stmt
);
5588 stmt_vec_info op1_def_stmt_info
;
5589 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
,
5590 &op1_def_stmt_info
))
5592 if (dump_enabled_p ())
5593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5594 "use not simple.\n");
5598 /* Multiple types in SLP are handled by creating the appropriate number of
5599 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5604 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5606 gcc_assert (ncopies
>= 1);
5608 /* Determine whether the shift amount is a vector, or scalar. If the
5609 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5611 if ((dt
[1] == vect_internal_def
5612 || dt
[1] == vect_induction_def
5613 || dt
[1] == vect_nested_cycle
)
5615 scalar_shift_arg
= false;
5616 else if (dt
[1] == vect_constant_def
5617 || dt
[1] == vect_external_def
5618 || dt
[1] == vect_internal_def
)
5620 /* In SLP, need to check whether the shift count is the same,
5621 in loops if it is a constant or invariant, it is always
5625 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5626 stmt_vec_info slpstmt_info
;
5628 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5630 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5631 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5632 scalar_shift_arg
= false;
5635 /* For internal SLP defs we have to make sure we see scalar stmts
5636 for all vector elements.
5637 ??? For different vectors we could resort to a different
5638 scalar shift operand but code-generation below simply always
5640 if (dt
[1] == vect_internal_def
5641 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5643 scalar_shift_arg
= false;
5646 /* If the shift amount is computed by a pattern stmt we cannot
5647 use the scalar amount directly thus give up and use a vector
5649 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5650 scalar_shift_arg
= false;
5654 if (dump_enabled_p ())
5655 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5656 "operand mode requires invariant argument.\n");
5660 /* Vector shifted by vector. */
5661 bool was_scalar_shift_arg
= scalar_shift_arg
;
5662 if (!scalar_shift_arg
)
5664 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5665 if (dump_enabled_p ())
5666 dump_printf_loc (MSG_NOTE
, vect_location
,
5667 "vector/vector shift/rotate found.\n");
5670 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5672 incompatible_op1_vectype_p
5673 = (op1_vectype
== NULL_TREE
5674 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5675 TYPE_VECTOR_SUBPARTS (vectype
))
5676 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5677 if (incompatible_op1_vectype_p
5679 || SLP_TREE_DEF_TYPE
5680 (SLP_TREE_CHILDREN (slp_node
)[1]) != vect_constant_def
))
5682 if (dump_enabled_p ())
5683 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5684 "unusable type for last operand in"
5685 " vector/vector shift/rotate.\n");
5689 /* See if the machine has a vector shifted by scalar insn and if not
5690 then see if it has a vector shifted by vector insn. */
5693 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5695 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5697 if (dump_enabled_p ())
5698 dump_printf_loc (MSG_NOTE
, vect_location
,
5699 "vector/scalar shift/rotate found.\n");
5703 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5705 && (optab_handler (optab
, TYPE_MODE (vectype
))
5706 != CODE_FOR_nothing
))
5708 scalar_shift_arg
= false;
5710 if (dump_enabled_p ())
5711 dump_printf_loc (MSG_NOTE
, vect_location
,
5712 "vector/vector shift/rotate found.\n");
5714 /* Unlike the other binary operators, shifts/rotates have
5715 the rhs being int, instead of the same type as the lhs,
5716 so make sure the scalar is the right type if we are
5717 dealing with vectors of long long/long/short/char. */
5718 incompatible_op1_vectype_p
5719 = !tree_nop_conversion_p (TREE_TYPE (vectype
),
5725 /* Supportable by target? */
5728 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5733 vec_mode
= TYPE_MODE (vectype
);
5734 icode
= (int) optab_handler (optab
, vec_mode
);
5735 if (icode
== CODE_FOR_nothing
)
5737 if (dump_enabled_p ())
5738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5739 "op not supported by target.\n");
5740 /* Check only during analysis. */
5741 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5743 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5745 if (dump_enabled_p ())
5746 dump_printf_loc (MSG_NOTE
, vect_location
,
5747 "proceeding using word mode.\n");
5750 /* Worthwhile without SIMD support? Check only during analysis. */
5752 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5753 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5755 if (dump_enabled_p ())
5756 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5757 "not worthwhile without SIMD support.\n");
5761 if (!vec_stmt
) /* transformation not required. */
5763 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5764 DUMP_VECT_SCOPE ("vectorizable_shift");
5765 vect_model_simple_cost (stmt_info
, ncopies
, dt
,
5766 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5772 if (dump_enabled_p ())
5773 dump_printf_loc (MSG_NOTE
, vect_location
,
5774 "transform binary/unary operation.\n");
5776 if (incompatible_op1_vectype_p
&& !slp_node
)
5778 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5779 if (dt
[1] != vect_constant_def
)
5780 op1
= vect_init_vector (stmt_info
, op1
,
5781 TREE_TYPE (vectype
), NULL
);
5785 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5787 prev_stmt_info
= NULL
;
5788 for (j
= 0; j
< ncopies
; j
++)
5793 if (scalar_shift_arg
)
5795 /* Vector shl and shr insn patterns can be defined with scalar
5796 operand 2 (shift operand). In this case, use constant or loop
5797 invariant op1 directly, without extending it to vector mode
5799 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5800 if (!VECTOR_MODE_P (optab_op2_mode
))
5802 if (dump_enabled_p ())
5803 dump_printf_loc (MSG_NOTE
, vect_location
,
5804 "operand 1 using scalar mode.\n");
5806 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5807 vec_oprnds1
.quick_push (vec_oprnd1
);
5810 /* Store vec_oprnd1 for every vector stmt to be created
5811 for SLP_NODE. We check during the analysis that all
5812 the shift arguments are the same.
5813 TODO: Allow different constants for different vector
5814 stmts generated for an SLP instance. */
5815 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5816 vec_oprnds1
.quick_push (vec_oprnd1
);
5820 else if (slp_node
&& incompatible_op1_vectype_p
)
5822 if (was_scalar_shift_arg
)
5824 /* If the argument was the same in all lanes create
5825 the correctly typed vector shift amount directly. */
5826 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5827 op1
= vect_init_vector (stmt_info
, op1
, TREE_TYPE (vectype
),
5828 !loop_vinfo
? gsi
: NULL
);
5829 vec_oprnd1
= vect_init_vector (stmt_info
, op1
, vectype
,
5830 !loop_vinfo
? gsi
: NULL
);
5831 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5832 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5833 vec_oprnds1
.quick_push (vec_oprnd1
);
5835 else if (dt
[1] == vect_constant_def
)
5837 /* Convert the scalar constant shift amounts in-place. */
5838 slp_tree shift
= SLP_TREE_CHILDREN (slp_node
)[1];
5839 gcc_assert (SLP_TREE_DEF_TYPE (shift
) == vect_constant_def
);
5840 for (unsigned i
= 0;
5841 i
< SLP_TREE_SCALAR_OPS (shift
).length (); ++i
)
5843 SLP_TREE_SCALAR_OPS (shift
)[i
]
5844 = fold_convert (TREE_TYPE (vectype
),
5845 SLP_TREE_SCALAR_OPS (shift
)[i
]);
5846 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift
)[i
])
5851 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5854 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5855 (a special case for certain kind of vector shifts); otherwise,
5856 operand 1 should be of a vector type (the usual case). */
5858 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5861 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
5865 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5867 /* Arguments are ready. Create the new vector stmt. */
5868 stmt_vec_info new_stmt_info
= NULL
;
5869 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5871 vop1
= vec_oprnds1
[i
];
5872 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5873 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5874 gimple_assign_set_lhs (new_stmt
, new_temp
);
5876 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5878 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5885 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5887 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5888 prev_stmt_info
= new_stmt_info
;
5891 vec_oprnds0
.release ();
5892 vec_oprnds1
.release ();
5898 /* Function vectorizable_operation.
5900 Check if STMT_INFO performs a binary, unary or ternary operation that can
5902 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5903 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5904 Return true if STMT_INFO is vectorizable in this way. */
5907 vectorizable_operation (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5908 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5909 stmt_vector_for_cost
*cost_vec
)
5913 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5915 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5916 enum tree_code code
, orig_code
;
5917 machine_mode vec_mode
;
5921 bool target_support_p
;
5922 enum vect_def_type dt
[3]
5923 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5925 stmt_vec_info prev_stmt_info
;
5926 poly_uint64 nunits_in
;
5927 poly_uint64 nunits_out
;
5929 int ncopies
, vec_num
;
5931 vec
<tree
> vec_oprnds0
= vNULL
;
5932 vec
<tree
> vec_oprnds1
= vNULL
;
5933 vec
<tree
> vec_oprnds2
= vNULL
;
5934 tree vop0
, vop1
, vop2
;
5935 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5936 vec_info
*vinfo
= stmt_info
->vinfo
;
5938 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5941 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5945 /* Is STMT a vectorizable binary/unary operation? */
5946 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5950 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5953 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5955 /* Shifts are handled in vectorizable_shift. */
5956 if (code
== LSHIFT_EXPR
5957 || code
== RSHIFT_EXPR
5958 || code
== LROTATE_EXPR
5959 || code
== RROTATE_EXPR
)
5962 /* Comparisons are handled in vectorizable_comparison. */
5963 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
5966 /* Conditions are handled in vectorizable_condition. */
5967 if (code
== COND_EXPR
)
5970 /* For pointer addition and subtraction, we should use the normal
5971 plus and minus for the vector operation. */
5972 if (code
== POINTER_PLUS_EXPR
)
5974 if (code
== POINTER_DIFF_EXPR
)
5977 /* Support only unary or binary operations. */
5978 op_type
= TREE_CODE_LENGTH (code
);
5979 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5981 if (dump_enabled_p ())
5982 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5983 "num. args = %d (not unary/binary/ternary op).\n",
5988 scalar_dest
= gimple_assign_lhs (stmt
);
5989 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5991 /* Most operations cannot handle bit-precision types without extra
5993 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
5995 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5996 /* Exception are bitwise binary operations. */
5997 && code
!= BIT_IOR_EXPR
5998 && code
!= BIT_XOR_EXPR
5999 && code
!= BIT_AND_EXPR
)
6001 if (dump_enabled_p ())
6002 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6003 "bit-precision arithmetic not supported.\n");
6007 op0
= gimple_assign_rhs1 (stmt
);
6008 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
6010 if (dump_enabled_p ())
6011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6012 "use not simple.\n");
6015 /* If op0 is an external or constant def, infer the vector type
6016 from the scalar type. */
6019 /* For boolean type we cannot determine vectype by
6020 invariant value (don't know whether it is a vector
6021 of booleans or vector of integers). We use output
6022 vectype because operations on boolean don't change
6024 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6026 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6028 if (dump_enabled_p ())
6029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6030 "not supported operation on bool value.\n");
6033 vectype
= vectype_out
;
6036 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6040 gcc_assert (vectype
);
6043 if (dump_enabled_p ())
6044 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6045 "no vectype for scalar type %T\n",
6051 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6052 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6053 if (maybe_ne (nunits_out
, nunits_in
))
6056 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6057 if (op_type
== binary_op
|| op_type
== ternary_op
)
6059 op1
= gimple_assign_rhs2 (stmt
);
6060 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype2
))
6062 if (dump_enabled_p ())
6063 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6064 "use not simple.\n");
6068 if (op_type
== ternary_op
)
6070 op2
= gimple_assign_rhs3 (stmt
);
6071 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2], &vectype3
))
6073 if (dump_enabled_p ())
6074 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6075 "use not simple.\n");
6080 /* Multiple types in SLP are handled by creating the appropriate number of
6081 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6086 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6090 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6094 gcc_assert (ncopies
>= 1);
6096 /* Reject attempts to combine mask types with nonmask types, e.g. if
6097 we have an AND between a (nonmask) boolean loaded from memory and
6098 a (mask) boolean result of a comparison.
6100 TODO: We could easily fix these cases up using pattern statements. */
6101 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6102 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6103 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6105 if (dump_enabled_p ())
6106 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6107 "mixed mask and nonmask vector types\n");
6111 /* Supportable by target? */
6113 vec_mode
= TYPE_MODE (vectype
);
6114 if (code
== MULT_HIGHPART_EXPR
)
6115 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6118 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6121 if (dump_enabled_p ())
6122 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6126 target_support_p
= (optab_handler (optab
, vec_mode
)
6127 != CODE_FOR_nothing
);
6130 if (!target_support_p
)
6132 if (dump_enabled_p ())
6133 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6134 "op not supported by target.\n");
6135 /* Check only during analysis. */
6136 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6137 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
6139 if (dump_enabled_p ())
6140 dump_printf_loc (MSG_NOTE
, vect_location
,
6141 "proceeding using word mode.\n");
6144 /* Worthwhile without SIMD support? Check only during analysis. */
6145 if (!VECTOR_MODE_P (vec_mode
)
6147 && !vect_worthwhile_without_simd_p (vinfo
, code
))
6149 if (dump_enabled_p ())
6150 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6151 "not worthwhile without SIMD support.\n");
6155 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6156 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6157 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6159 if (!vec_stmt
) /* transformation not required. */
6161 /* If this operation is part of a reduction, a fully-masked loop
6162 should only change the active lanes of the reduction chain,
6163 keeping the inactive lanes as-is. */
6165 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
6168 if (cond_fn
== IFN_LAST
6169 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6170 OPTIMIZE_FOR_SPEED
))
6172 if (dump_enabled_p ())
6173 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6174 "can't use a fully-masked loop because no"
6175 " conditional operation is available.\n");
6176 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
6179 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6183 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6184 DUMP_VECT_SCOPE ("vectorizable_operation");
6185 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6191 if (dump_enabled_p ())
6192 dump_printf_loc (MSG_NOTE
, vect_location
,
6193 "transform binary/unary operation.\n");
6195 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6197 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6198 vectors with unsigned elements, but the result is signed. So, we
6199 need to compute the MINUS_EXPR into vectype temporary and
6200 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6201 tree vec_cvt_dest
= NULL_TREE
;
6202 if (orig_code
== POINTER_DIFF_EXPR
)
6204 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6205 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6209 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6211 /* In case the vectorization factor (VF) is bigger than the number
6212 of elements that we can fit in a vectype (nunits), we have to generate
6213 more than one vector stmt - i.e - we need to "unroll" the
6214 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6215 from one copy of the vector stmt to the next, in the field
6216 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6217 stages to find the correct vector defs to be used when vectorizing
6218 stmts that use the defs of the current stmt. The example below
6219 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6220 we need to create 4 vectorized stmts):
6222 before vectorization:
6223 RELATED_STMT VEC_STMT
6227 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6229 RELATED_STMT VEC_STMT
6230 VS1_0: vx0 = memref0 VS1_1 -
6231 VS1_1: vx1 = memref1 VS1_2 -
6232 VS1_2: vx2 = memref2 VS1_3 -
6233 VS1_3: vx3 = memref3 - -
6234 S1: x = load - VS1_0
6237 step2: vectorize stmt S2 (done here):
6238 To vectorize stmt S2 we first need to find the relevant vector
6239 def for the first operand 'x'. This is, as usual, obtained from
6240 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6241 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6242 relevant vector def 'vx0'. Having found 'vx0' we can generate
6243 the vector stmt VS2_0, and as usual, record it in the
6244 STMT_VINFO_VEC_STMT of stmt S2.
6245 When creating the second copy (VS2_1), we obtain the relevant vector
6246 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6247 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6248 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6249 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6250 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6251 chain of stmts and pointers:
6252 RELATED_STMT VEC_STMT
6253 VS1_0: vx0 = memref0 VS1_1 -
6254 VS1_1: vx1 = memref1 VS1_2 -
6255 VS1_2: vx2 = memref2 VS1_3 -
6256 VS1_3: vx3 = memref3 - -
6257 S1: x = load - VS1_0
6258 VS2_0: vz0 = vx0 + v1 VS2_1 -
6259 VS2_1: vz1 = vx1 + v1 VS2_2 -
6260 VS2_2: vz2 = vx2 + v1 VS2_3 -
6261 VS2_3: vz3 = vx3 + v1 - -
6262 S2: z = x + 1 - VS2_0 */
6264 prev_stmt_info
= NULL
;
6265 for (j
= 0; j
< ncopies
; j
++)
6270 if (op_type
== binary_op
)
6271 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
6273 else if (op_type
== ternary_op
)
6277 auto_vec
<vec
<tree
> > vec_defs(3);
6278 vect_get_slp_defs (slp_node
, &vec_defs
);
6279 vec_oprnds0
= vec_defs
[0];
6280 vec_oprnds1
= vec_defs
[1];
6281 vec_oprnds2
= vec_defs
[2];
6285 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
6286 &vec_oprnds1
, NULL
);
6287 vect_get_vec_defs (op2
, NULL_TREE
, stmt_info
, &vec_oprnds2
,
6292 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
6297 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6298 if (op_type
== ternary_op
)
6300 tree vec_oprnd
= vec_oprnds2
.pop ();
6301 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6306 /* Arguments are ready. Create the new vector stmt. */
6307 stmt_vec_info new_stmt_info
= NULL
;
6308 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6310 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6311 ? vec_oprnds1
[i
] : NULL_TREE
);
6312 vop2
= ((op_type
== ternary_op
)
6313 ? vec_oprnds2
[i
] : NULL_TREE
);
6314 if (masked_loop_p
&& reduc_idx
>= 0)
6316 /* Perform the operation on active elements only and take
6317 inactive elements from the reduction chain input. */
6319 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6320 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6321 vectype
, i
* ncopies
+ j
);
6322 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6324 new_temp
= make_ssa_name (vec_dest
, call
);
6325 gimple_call_set_lhs (call
, new_temp
);
6326 gimple_call_set_nothrow (call
, true);
6328 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
6332 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6334 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6335 gimple_assign_set_lhs (new_stmt
, new_temp
);
6337 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6340 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6342 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6344 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6345 gimple_assign_set_lhs (new_stmt
, new_temp
);
6347 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6351 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6358 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6360 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6361 prev_stmt_info
= new_stmt_info
;
6364 vec_oprnds0
.release ();
6365 vec_oprnds1
.release ();
6366 vec_oprnds2
.release ();
6371 /* A helper function to ensure data reference DR_INFO's base alignment. */
6374 ensure_base_align (dr_vec_info
*dr_info
)
6376 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6379 if (dr_info
->base_misaligned
)
6381 tree base_decl
= dr_info
->base_decl
;
6383 // We should only be able to increase the alignment of a base object if
6384 // we know what its new alignment should be at compile time.
6385 unsigned HOST_WIDE_INT align_base_to
=
6386 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6388 if (decl_in_symtab_p (base_decl
))
6389 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6390 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6392 SET_DECL_ALIGN (base_decl
, align_base_to
);
6393 DECL_USER_ALIGN (base_decl
) = 1;
6395 dr_info
->base_misaligned
= false;
6400 /* Function get_group_alias_ptr_type.
6402 Return the alias type for the group starting at FIRST_STMT_INFO. */
6405 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6407 struct data_reference
*first_dr
, *next_dr
;
6409 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6410 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6411 while (next_stmt_info
)
6413 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6414 if (get_alias_set (DR_REF (first_dr
))
6415 != get_alias_set (DR_REF (next_dr
)))
6417 if (dump_enabled_p ())
6418 dump_printf_loc (MSG_NOTE
, vect_location
,
6419 "conflicting alias set types.\n");
6420 return ptr_type_node
;
6422 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6424 return reference_alias_ptr_type (DR_REF (first_dr
));
6428 /* Function scan_operand_equal_p.
6430 Helper function for check_scan_store. Compare two references
6431 with .GOMP_SIMD_LANE bases. */
6434 scan_operand_equal_p (tree ref1
, tree ref2
)
6436 tree ref
[2] = { ref1
, ref2
};
6437 poly_int64 bitsize
[2], bitpos
[2];
6438 tree offset
[2], base
[2];
6439 for (int i
= 0; i
< 2; ++i
)
6442 int unsignedp
, reversep
, volatilep
= 0;
6443 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6444 &offset
[i
], &mode
, &unsignedp
,
6445 &reversep
, &volatilep
);
6446 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6448 if (TREE_CODE (base
[i
]) == MEM_REF
6449 && offset
[i
] == NULL_TREE
6450 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6452 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6453 if (is_gimple_assign (def_stmt
)
6454 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6455 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6456 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6458 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6460 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6461 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6466 if (!operand_equal_p (base
[0], base
[1], 0))
6468 if (maybe_ne (bitsize
[0], bitsize
[1]))
6470 if (offset
[0] != offset
[1])
6472 if (!offset
[0] || !offset
[1])
6474 if (!operand_equal_p (offset
[0], offset
[1], 0))
6477 for (int i
= 0; i
< 2; ++i
)
6479 step
[i
] = integer_one_node
;
6480 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6482 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6483 if (is_gimple_assign (def_stmt
)
6484 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6485 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6488 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6489 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6492 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6494 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6495 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6497 tree rhs1
= NULL_TREE
;
6498 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6500 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6501 if (gimple_assign_cast_p (def_stmt
))
6502 rhs1
= gimple_assign_rhs1 (def_stmt
);
6504 else if (CONVERT_EXPR_P (offset
[i
]))
6505 rhs1
= TREE_OPERAND (offset
[i
], 0);
6507 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6508 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6509 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6510 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6513 if (!operand_equal_p (offset
[0], offset
[1], 0)
6514 || !operand_equal_p (step
[0], step
[1], 0))
6522 enum scan_store_kind
{
6523 /* Normal permutation. */
6524 scan_store_kind_perm
,
6526 /* Whole vector left shift permutation with zero init. */
6527 scan_store_kind_lshift_zero
,
6529 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6530 scan_store_kind_lshift_cond
6533 /* Function check_scan_store.
6535 Verify if we can perform the needed permutations or whole vector shifts.
6536 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6537 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6538 to do at each step. */
6541 scan_store_can_perm_p (tree vectype
, tree init
,
6542 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6544 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6545 unsigned HOST_WIDE_INT nunits
;
6546 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6548 int units_log2
= exact_log2 (nunits
);
6549 if (units_log2
<= 0)
6553 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6554 for (i
= 0; i
<= units_log2
; ++i
)
6556 unsigned HOST_WIDE_INT j
, k
;
6557 enum scan_store_kind kind
= scan_store_kind_perm
;
6558 vec_perm_builder
sel (nunits
, nunits
, 1);
6559 sel
.quick_grow (nunits
);
6560 if (i
== units_log2
)
6562 for (j
= 0; j
< nunits
; ++j
)
6563 sel
[j
] = nunits
- 1;
6567 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6569 for (k
= 0; j
< nunits
; ++j
, ++k
)
6570 sel
[j
] = nunits
+ k
;
6572 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6573 if (!can_vec_perm_const_p (vec_mode
, indices
))
6575 if (i
== units_log2
)
6578 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6580 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6582 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6583 /* Whole vector shifts shift in zeros, so if init is all zero
6584 constant, there is no need to do anything further. */
6585 if ((TREE_CODE (init
) != INTEGER_CST
6586 && TREE_CODE (init
) != REAL_CST
)
6587 || !initializer_zerop (init
))
6589 tree masktype
= truth_type_for (vectype
);
6590 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6592 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6595 kind
= whole_vector_shift_kind
;
6597 if (use_whole_vector
)
6599 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6600 use_whole_vector
->safe_grow_cleared (i
);
6601 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6602 use_whole_vector
->safe_push (kind
);
6610 /* Function check_scan_store.
6612 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6615 check_scan_store (stmt_vec_info stmt_info
, tree vectype
,
6616 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6617 vect_memory_access_type memory_access_type
)
6619 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6620 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6623 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6626 || memory_access_type
!= VMAT_CONTIGUOUS
6627 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6628 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6629 || loop_vinfo
== NULL
6630 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6631 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6632 || !integer_zerop (get_dr_vinfo_offset (dr_info
))
6633 || !integer_zerop (DR_INIT (dr_info
->dr
))
6634 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6635 || !alias_sets_conflict_p (get_alias_set (vectype
),
6636 get_alias_set (TREE_TYPE (ref_type
))))
6638 if (dump_enabled_p ())
6639 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6640 "unsupported OpenMP scan store.\n");
6644 /* We need to pattern match code built by OpenMP lowering and simplified
6645 by following optimizations into something we can handle.
6646 #pragma omp simd reduction(inscan,+:r)
6650 #pragma omp scan inclusive (r)
6653 shall have body with:
6654 // Initialization for input phase, store the reduction initializer:
6655 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6656 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6658 // Actual input phase:
6660 r.0_5 = D.2042[_20];
6663 // Initialization for scan phase:
6664 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6670 // Actual scan phase:
6672 r.1_8 = D.2042[_20];
6674 The "omp simd array" variable D.2042 holds the privatized copy used
6675 inside of the loop and D.2043 is another one that holds copies of
6676 the current original list item. The separate GOMP_SIMD_LANE ifn
6677 kinds are there in order to allow optimizing the initializer store
6678 and combiner sequence, e.g. if it is originally some C++ish user
6679 defined reduction, but allow the vectorizer to pattern recognize it
6680 and turn into the appropriate vectorized scan.
6682 For exclusive scan, this is slightly different:
6683 #pragma omp simd reduction(inscan,+:r)
6687 #pragma omp scan exclusive (r)
6690 shall have body with:
6691 // Initialization for input phase, store the reduction initializer:
6692 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6693 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6695 // Actual input phase:
6697 r.0_5 = D.2042[_20];
6700 // Initialization for scan phase:
6701 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6707 // Actual scan phase:
6709 r.1_8 = D.2044[_20];
6712 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6714 /* Match the D.2042[_21] = 0; store above. Just require that
6715 it is a constant or external definition store. */
6716 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6719 if (dump_enabled_p ())
6720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6721 "unsupported OpenMP scan initializer store.\n");
6725 if (! loop_vinfo
->scan_map
)
6726 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6727 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6728 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6731 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6733 /* These stores can be vectorized normally. */
6737 if (rhs_dt
!= vect_internal_def
)
6740 if (dump_enabled_p ())
6741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6742 "unsupported OpenMP scan combiner pattern.\n");
6746 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6747 tree rhs
= gimple_assign_rhs1 (stmt
);
6748 if (TREE_CODE (rhs
) != SSA_NAME
)
6751 gimple
*other_store_stmt
= NULL
;
6752 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6753 bool inscan_var_store
6754 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6756 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6758 if (!inscan_var_store
)
6760 use_operand_p use_p
;
6761 imm_use_iterator iter
;
6762 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6764 gimple
*use_stmt
= USE_STMT (use_p
);
6765 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6767 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6768 || !is_gimple_assign (use_stmt
)
6769 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6771 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6773 other_store_stmt
= use_stmt
;
6775 if (other_store_stmt
== NULL
)
6777 rhs
= gimple_assign_lhs (other_store_stmt
);
6778 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6782 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6784 use_operand_p use_p
;
6785 imm_use_iterator iter
;
6786 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6788 gimple
*use_stmt
= USE_STMT (use_p
);
6789 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6791 if (other_store_stmt
)
6793 other_store_stmt
= use_stmt
;
6799 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6800 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6801 || !is_gimple_assign (def_stmt
)
6802 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6805 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6806 /* For pointer addition, we should use the normal plus for the vector
6810 case POINTER_PLUS_EXPR
:
6813 case MULT_HIGHPART_EXPR
:
6818 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6821 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6822 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6823 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6826 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6827 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6828 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6829 || !gimple_assign_load_p (load1_stmt
)
6830 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6831 || !gimple_assign_load_p (load2_stmt
))
6834 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6835 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6836 if (load1_stmt_info
== NULL
6837 || load2_stmt_info
== NULL
6838 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6839 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6840 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6841 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6844 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6846 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6847 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6848 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6850 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6852 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6856 use_operand_p use_p
;
6857 imm_use_iterator iter
;
6858 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6860 gimple
*use_stmt
= USE_STMT (use_p
);
6861 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6863 if (other_store_stmt
)
6865 other_store_stmt
= use_stmt
;
6869 if (other_store_stmt
== NULL
)
6871 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6872 || !gimple_store_p (other_store_stmt
))
6875 stmt_vec_info other_store_stmt_info
6876 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6877 if (other_store_stmt_info
== NULL
6878 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6879 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6882 gimple
*stmt1
= stmt
;
6883 gimple
*stmt2
= other_store_stmt
;
6884 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6885 std::swap (stmt1
, stmt2
);
6886 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6887 gimple_assign_rhs1 (load2_stmt
)))
6889 std::swap (rhs1
, rhs2
);
6890 std::swap (load1_stmt
, load2_stmt
);
6891 std::swap (load1_stmt_info
, load2_stmt_info
);
6893 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6894 gimple_assign_rhs1 (load1_stmt
)))
6897 tree var3
= NULL_TREE
;
6898 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6899 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6900 gimple_assign_rhs1 (load2_stmt
)))
6902 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6904 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6905 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6906 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6908 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6909 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6910 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6911 || lookup_attribute ("omp simd inscan exclusive",
6912 DECL_ATTRIBUTES (var3
)))
6916 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6917 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6918 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6921 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6922 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6923 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6924 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6925 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6926 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6929 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6930 std::swap (var1
, var2
);
6932 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6934 if (!lookup_attribute ("omp simd inscan exclusive",
6935 DECL_ATTRIBUTES (var1
)))
6940 if (loop_vinfo
->scan_map
== NULL
)
6942 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6946 /* The IL is as expected, now check if we can actually vectorize it.
6953 should be vectorized as (where _40 is the vectorized rhs
6954 from the D.2042[_21] = 0; store):
6955 _30 = MEM <vector(8) int> [(int *)&D.2043];
6956 _31 = MEM <vector(8) int> [(int *)&D.2042];
6957 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6959 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6960 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6962 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6963 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6964 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6966 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6967 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6969 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6970 MEM <vector(8) int> [(int *)&D.2043] = _39;
6971 MEM <vector(8) int> [(int *)&D.2042] = _38;
6978 should be vectorized as (where _40 is the vectorized rhs
6979 from the D.2042[_21] = 0; store):
6980 _30 = MEM <vector(8) int> [(int *)&D.2043];
6981 _31 = MEM <vector(8) int> [(int *)&D.2042];
6982 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6983 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6985 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6986 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6987 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6989 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6990 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6991 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6993 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6994 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6997 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6998 MEM <vector(8) int> [(int *)&D.2044] = _39;
6999 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7000 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7001 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7002 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7005 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7006 if (units_log2
== -1)
7013 /* Function vectorizable_scan_store.
7015 Helper of vectorizable_score, arguments like on vectorizable_store.
7016 Handle only the transformation, checking is done in check_scan_store. */
7019 vectorizable_scan_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7020 stmt_vec_info
*vec_stmt
, int ncopies
)
7022 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7023 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7024 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7025 vec_info
*vinfo
= stmt_info
->vinfo
;
7026 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7028 if (dump_enabled_p ())
7029 dump_printf_loc (MSG_NOTE
, vect_location
,
7030 "transform scan store. ncopies = %d\n", ncopies
);
7032 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7033 tree rhs
= gimple_assign_rhs1 (stmt
);
7034 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7036 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7037 bool inscan_var_store
7038 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7040 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7042 use_operand_p use_p
;
7043 imm_use_iterator iter
;
7044 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7046 gimple
*use_stmt
= USE_STMT (use_p
);
7047 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7049 rhs
= gimple_assign_lhs (use_stmt
);
7054 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7055 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7056 if (code
== POINTER_PLUS_EXPR
)
7058 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7059 && commutative_tree_code (code
));
7060 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7061 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7062 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7063 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7064 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7065 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7066 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7067 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7068 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7069 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7070 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7072 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7074 std::swap (rhs1
, rhs2
);
7075 std::swap (var1
, var2
);
7076 std::swap (load1_dr_info
, load2_dr_info
);
7079 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7082 unsigned HOST_WIDE_INT nunits
;
7083 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7085 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7086 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7087 gcc_assert (units_log2
> 0);
7088 auto_vec
<tree
, 16> perms
;
7089 perms
.quick_grow (units_log2
+ 1);
7090 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7091 for (int i
= 0; i
<= units_log2
; ++i
)
7093 unsigned HOST_WIDE_INT j
, k
;
7094 vec_perm_builder
sel (nunits
, nunits
, 1);
7095 sel
.quick_grow (nunits
);
7096 if (i
== units_log2
)
7097 for (j
= 0; j
< nunits
; ++j
)
7098 sel
[j
] = nunits
- 1;
7101 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7103 for (k
= 0; j
< nunits
; ++j
, ++k
)
7104 sel
[j
] = nunits
+ k
;
7106 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7107 if (!use_whole_vector
.is_empty ()
7108 && use_whole_vector
[i
] != scan_store_kind_perm
)
7110 if (zero_vec
== NULL_TREE
)
7111 zero_vec
= build_zero_cst (vectype
);
7112 if (masktype
== NULL_TREE
7113 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7114 masktype
= truth_type_for (vectype
);
7115 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7118 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7121 stmt_vec_info prev_stmt_info
= NULL
;
7122 tree vec_oprnd1
= NULL_TREE
;
7123 tree vec_oprnd2
= NULL_TREE
;
7124 tree vec_oprnd3
= NULL_TREE
;
7125 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7126 tree dataref_offset
= build_int_cst (ref_type
, 0);
7127 tree bump
= vect_get_data_ptr_increment (dr_info
, vectype
, VMAT_CONTIGUOUS
);
7128 tree ldataref_ptr
= NULL_TREE
;
7129 tree orig
= NULL_TREE
;
7130 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7131 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7132 for (int j
= 0; j
< ncopies
; j
++)
7134 stmt_vec_info new_stmt_info
;
7137 vec_oprnd1
= vect_get_vec_def_for_operand (*init
, stmt_info
);
7138 if (ldataref_ptr
== NULL
)
7139 vec_oprnd2
= vect_get_vec_def_for_operand (rhs1
, stmt_info
);
7140 vec_oprnd3
= vect_get_vec_def_for_operand (rhs2
, stmt_info
);
7145 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
7146 if (ldataref_ptr
== NULL
)
7147 vec_oprnd2
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd2
);
7148 vec_oprnd3
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd3
);
7149 if (!inscan_var_store
)
7150 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7155 vec_oprnd2
= make_ssa_name (vectype
);
7156 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7157 unshare_expr (ldataref_ptr
),
7159 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7160 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7161 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7162 if (prev_stmt_info
== NULL
)
7163 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7165 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7166 prev_stmt_info
= new_stmt_info
;
7169 tree v
= vec_oprnd2
;
7170 for (int i
= 0; i
< units_log2
; ++i
)
7172 tree new_temp
= make_ssa_name (vectype
);
7173 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7175 && (use_whole_vector
[i
]
7176 != scan_store_kind_perm
))
7177 ? zero_vec
: vec_oprnd1
, v
,
7179 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7180 if (prev_stmt_info
== NULL
)
7181 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7183 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7184 prev_stmt_info
= new_stmt_info
;
7186 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7188 /* Whole vector shift shifted in zero bits, but if *init
7189 is not initializer_zerop, we need to replace those elements
7190 with elements from vec_oprnd1. */
7191 tree_vector_builder
vb (masktype
, nunits
, 1);
7192 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7193 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7194 ? boolean_false_node
: boolean_true_node
);
7196 tree new_temp2
= make_ssa_name (vectype
);
7197 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7198 new_temp
, vec_oprnd1
);
7199 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7200 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7201 prev_stmt_info
= new_stmt_info
;
7202 new_temp
= new_temp2
;
7205 /* For exclusive scan, perform the perms[i] permutation once
7208 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7216 tree new_temp2
= make_ssa_name (vectype
);
7217 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7218 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7219 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7220 prev_stmt_info
= new_stmt_info
;
7225 tree new_temp
= make_ssa_name (vectype
);
7226 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7227 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7228 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7229 prev_stmt_info
= new_stmt_info
;
7231 tree last_perm_arg
= new_temp
;
7232 /* For exclusive scan, new_temp computed above is the exclusive scan
7233 prefix sum. Turn it into inclusive prefix sum for the broadcast
7234 of the last element into orig. */
7235 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7237 last_perm_arg
= make_ssa_name (vectype
);
7238 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7239 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7240 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7241 prev_stmt_info
= new_stmt_info
;
7244 orig
= make_ssa_name (vectype
);
7245 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7246 last_perm_arg
, perms
[units_log2
]);
7247 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7248 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7249 prev_stmt_info
= new_stmt_info
;
7251 if (!inscan_var_store
)
7253 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7254 unshare_expr (dataref_ptr
),
7256 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7257 g
= gimple_build_assign (data_ref
, new_temp
);
7258 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7259 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7260 prev_stmt_info
= new_stmt_info
;
7264 if (inscan_var_store
)
7265 for (int j
= 0; j
< ncopies
; j
++)
7268 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7270 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7271 unshare_expr (dataref_ptr
),
7273 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7274 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7275 stmt_vec_info new_stmt_info
7276 = vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7277 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7278 prev_stmt_info
= new_stmt_info
;
7284 /* Function vectorizable_store.
7286 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7287 that can be vectorized.
7288 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7289 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7290 Return true if STMT_INFO is vectorizable in this way. */
7293 vectorizable_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7294 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7295 stmt_vector_for_cost
*cost_vec
)
7299 tree vec_oprnd
= NULL_TREE
;
7301 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7302 class loop
*loop
= NULL
;
7303 machine_mode vec_mode
;
7305 enum dr_alignment_support alignment_support_scheme
;
7306 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7307 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7308 stmt_vec_info prev_stmt_info
= NULL
;
7309 tree dataref_ptr
= NULL_TREE
;
7310 tree dataref_offset
= NULL_TREE
;
7311 gimple
*ptr_incr
= NULL
;
7314 stmt_vec_info first_stmt_info
;
7316 unsigned int group_size
, i
;
7317 vec
<tree
> oprnds
= vNULL
;
7318 vec
<tree
> result_chain
= vNULL
;
7319 tree offset
= NULL_TREE
;
7320 vec
<tree
> vec_oprnds
= vNULL
;
7321 bool slp
= (slp_node
!= NULL
);
7322 unsigned int vec_num
;
7323 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7324 vec_info
*vinfo
= stmt_info
->vinfo
;
7326 gather_scatter_info gs_info
;
7328 vec_load_store_type vls_type
;
7331 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7334 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7338 /* Is vectorizable store? */
7340 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7341 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7343 tree scalar_dest
= gimple_assign_lhs (assign
);
7344 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7345 && is_pattern_stmt_p (stmt_info
))
7346 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7347 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7348 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7349 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7350 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7351 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7352 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7353 && TREE_CODE (scalar_dest
) != MEM_REF
)
7358 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7359 if (!call
|| !gimple_call_internal_p (call
))
7362 internal_fn ifn
= gimple_call_internal_fn (call
);
7363 if (!internal_store_fn_p (ifn
))
7366 if (slp_node
!= NULL
)
7368 if (dump_enabled_p ())
7369 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7370 "SLP of masked stores not supported.\n");
7374 int mask_index
= internal_fn_mask_index (ifn
);
7375 if (mask_index
>= 0)
7377 mask
= gimple_call_arg (call
, mask_index
);
7378 if (!vect_check_scalar_mask (stmt_info
, mask
, &mask_dt
,
7384 op
= vect_get_store_rhs (stmt_info
);
7386 /* Cannot have hybrid store SLP -- that would mean storing to the
7387 same location twice. */
7388 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7390 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7391 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7395 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7396 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7401 /* Multiple types in SLP are handled by creating the appropriate number of
7402 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7407 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7409 gcc_assert (ncopies
>= 1);
7411 /* FORNOW. This restriction should be relaxed. */
7412 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7414 if (dump_enabled_p ())
7415 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7416 "multiple types in nested loop.\n");
7420 if (!vect_check_store_rhs (stmt_info
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7423 elem_type
= TREE_TYPE (vectype
);
7424 vec_mode
= TYPE_MODE (vectype
);
7426 if (!STMT_VINFO_DATA_REF (stmt_info
))
7429 vect_memory_access_type memory_access_type
;
7430 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, vls_type
, ncopies
,
7431 &memory_access_type
, &gs_info
))
7436 if (memory_access_type
== VMAT_CONTIGUOUS
)
7438 if (!VECTOR_MODE_P (vec_mode
)
7439 || !can_vec_mask_load_store_p (vec_mode
,
7440 TYPE_MODE (mask_vectype
), false))
7443 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7444 && (memory_access_type
!= VMAT_GATHER_SCATTER
7445 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7447 if (dump_enabled_p ())
7448 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7449 "unsupported access type for masked store.\n");
7455 /* FORNOW. In some cases can vectorize even if data-type not supported
7456 (e.g. - array initialization with 0). */
7457 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7461 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7462 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7463 && memory_access_type
!= VMAT_GATHER_SCATTER
7464 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7467 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7468 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7469 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7473 first_stmt_info
= stmt_info
;
7474 first_dr_info
= dr_info
;
7475 group_size
= vec_num
= 1;
7478 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7480 if (!check_scan_store (stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7481 memory_access_type
))
7485 if (!vec_stmt
) /* transformation not required. */
7487 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7490 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7491 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
7492 memory_access_type
, &gs_info
, mask
);
7494 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7495 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
7496 vls_type
, slp_node
, cost_vec
);
7499 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7503 ensure_base_align (dr_info
);
7505 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7507 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7508 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7509 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7510 tree ptr
, var
, scale
, vec_mask
;
7511 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7512 tree mask_halfvectype
= mask_vectype
;
7513 edge pe
= loop_preheader_edge (loop
);
7516 enum { NARROW
, NONE
, WIDEN
} modifier
;
7517 poly_uint64 scatter_off_nunits
7518 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7520 if (known_eq (nunits
, scatter_off_nunits
))
7522 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7526 /* Currently gathers and scatters are only supported for
7527 fixed-length vectors. */
7528 unsigned int count
= scatter_off_nunits
.to_constant ();
7529 vec_perm_builder
sel (count
, count
, 1);
7530 for (i
= 0; i
< (unsigned int) count
; ++i
)
7531 sel
.quick_push (i
| (count
/ 2));
7533 vec_perm_indices
indices (sel
, 1, count
);
7534 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7536 gcc_assert (perm_mask
!= NULL_TREE
);
7538 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7542 /* Currently gathers and scatters are only supported for
7543 fixed-length vectors. */
7544 unsigned int count
= nunits
.to_constant ();
7545 vec_perm_builder
sel (count
, count
, 1);
7546 for (i
= 0; i
< (unsigned int) count
; ++i
)
7547 sel
.quick_push (i
| (count
/ 2));
7549 vec_perm_indices
indices (sel
, 2, count
);
7550 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7551 gcc_assert (perm_mask
!= NULL_TREE
);
7555 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7560 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7561 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7562 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7563 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7564 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7565 scaletype
= TREE_VALUE (arglist
);
7567 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7568 && TREE_CODE (rettype
) == VOID_TYPE
);
7570 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7571 if (!is_gimple_min_invariant (ptr
))
7573 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7574 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7575 gcc_assert (!new_bb
);
7578 if (mask
== NULL_TREE
)
7580 mask_arg
= build_int_cst (masktype
, -1);
7581 mask_arg
= vect_init_vector (stmt_info
, mask_arg
, masktype
, NULL
);
7584 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7586 prev_stmt_info
= NULL
;
7587 for (j
= 0; j
< ncopies
; ++j
)
7591 src
= vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt_info
);
7592 op
= vec_oprnd0
= vect_get_vec_def_for_operand (gs_info
.offset
,
7595 mask_op
= vec_mask
= vect_get_vec_def_for_operand (mask
,
7598 else if (modifier
!= NONE
&& (j
& 1))
7600 if (modifier
== WIDEN
)
7603 = vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7605 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
7609 = vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7612 else if (modifier
== NARROW
)
7614 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
7616 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7624 src
= vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7626 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7629 mask_op
= vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7633 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7635 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7636 TYPE_VECTOR_SUBPARTS (srctype
)));
7637 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7638 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7640 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7641 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7645 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7647 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7648 TYPE_VECTOR_SUBPARTS (idxtype
)));
7649 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7650 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7652 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7653 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7661 if (modifier
== NARROW
)
7663 var
= vect_get_new_ssa_name (mask_halfvectype
,
7666 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7667 : VEC_UNPACK_LO_EXPR
,
7669 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7672 tree optype
= TREE_TYPE (mask_arg
);
7673 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7676 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7677 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7678 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7680 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7681 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7683 if (!useless_type_conversion_p (masktype
, utype
))
7685 gcc_assert (TYPE_PRECISION (utype
)
7686 <= TYPE_PRECISION (masktype
));
7687 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7688 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7689 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7695 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7696 stmt_vec_info new_stmt_info
7697 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7699 if (prev_stmt_info
== NULL
)
7700 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7702 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7703 prev_stmt_info
= new_stmt_info
;
7707 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7708 return vectorizable_scan_store (stmt_info
, gsi
, vec_stmt
, ncopies
);
7710 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7711 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7716 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7718 /* We vectorize all the stmts of the interleaving group when we
7719 reach the last stmt in the group. */
7720 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7721 < DR_GROUP_SIZE (first_stmt_info
)
7730 grouped_store
= false;
7731 /* VEC_NUM is the number of vect stmts to be created for this
7733 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7734 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7735 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7736 == first_stmt_info
);
7737 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7738 op
= vect_get_store_rhs (first_stmt_info
);
7741 /* VEC_NUM is the number of vect stmts to be created for this
7743 vec_num
= group_size
;
7745 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7748 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7750 if (dump_enabled_p ())
7751 dump_printf_loc (MSG_NOTE
, vect_location
,
7752 "transform store. ncopies = %d\n", ncopies
);
7754 if (memory_access_type
== VMAT_ELEMENTWISE
7755 || memory_access_type
== VMAT_STRIDED_SLP
)
7757 gimple_stmt_iterator incr_gsi
;
7763 tree stride_base
, stride_step
, alias_off
;
7767 /* Checked by get_load_store_type. */
7768 unsigned int const_nunits
= nunits
.to_constant ();
7770 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7771 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7773 dr_offset
= get_dr_vinfo_offset (first_dr_info
);
7775 = fold_build_pointer_plus
7776 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7777 size_binop (PLUS_EXPR
,
7778 convert_to_ptrofftype (dr_offset
),
7779 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7780 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7782 /* For a store with loop-invariant (but other than power-of-2)
7783 stride (i.e. not a grouped access) like so:
7785 for (i = 0; i < n; i += stride)
7788 we generate a new induction variable and new stores from
7789 the components of the (vectorized) rhs:
7791 for (j = 0; ; j += VF*stride)
7796 array[j + stride] = tmp2;
7800 unsigned nstores
= const_nunits
;
7802 tree ltype
= elem_type
;
7803 tree lvectype
= vectype
;
7806 if (group_size
< const_nunits
7807 && const_nunits
% group_size
== 0)
7809 nstores
= const_nunits
/ group_size
;
7811 ltype
= build_vector_type (elem_type
, group_size
);
7814 /* First check if vec_extract optab doesn't support extraction
7815 of vector elts directly. */
7816 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7818 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7819 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7820 group_size
).exists (&vmode
)
7821 || (convert_optab_handler (vec_extract_optab
,
7822 TYPE_MODE (vectype
), vmode
)
7823 == CODE_FOR_nothing
))
7825 /* Try to avoid emitting an extract of vector elements
7826 by performing the extracts using an integer type of the
7827 same size, extracting from a vector of those and then
7828 re-interpreting it as the original vector type if
7831 = group_size
* GET_MODE_BITSIZE (elmode
);
7832 unsigned int lnunits
= const_nunits
/ group_size
;
7833 /* If we can't construct such a vector fall back to
7834 element extracts from the original vector type and
7835 element size stores. */
7836 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7837 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7838 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7839 lnunits
).exists (&vmode
)
7840 && (convert_optab_handler (vec_extract_optab
,
7842 != CODE_FOR_nothing
))
7846 ltype
= build_nonstandard_integer_type (lsize
, 1);
7847 lvectype
= build_vector_type (ltype
, nstores
);
7849 /* Else fall back to vector extraction anyway.
7850 Fewer stores are more important than avoiding spilling
7851 of the vector we extract from. Compared to the
7852 construction case in vectorizable_load no store-forwarding
7853 issue exists here for reasonable archs. */
7856 else if (group_size
>= const_nunits
7857 && group_size
% const_nunits
== 0)
7860 lnel
= const_nunits
;
7864 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7865 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7868 ivstep
= stride_step
;
7869 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7870 build_int_cst (TREE_TYPE (ivstep
), vf
));
7872 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7874 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7875 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7876 create_iv (stride_base
, ivstep
, NULL
,
7877 loop
, &incr_gsi
, insert_after
,
7879 incr
= gsi_stmt (incr_gsi
);
7880 loop_vinfo
->add_stmt (incr
);
7882 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7884 prev_stmt_info
= NULL
;
7885 alias_off
= build_int_cst (ref_type
, 0);
7886 stmt_vec_info next_stmt_info
= first_stmt_info
;
7887 for (g
= 0; g
< group_size
; g
++)
7889 running_off
= offvar
;
7892 tree size
= TYPE_SIZE_UNIT (ltype
);
7893 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7895 tree newoff
= copy_ssa_name (running_off
, NULL
);
7896 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7898 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7899 running_off
= newoff
;
7901 unsigned int group_el
= 0;
7902 unsigned HOST_WIDE_INT
7903 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7904 for (j
= 0; j
< ncopies
; j
++)
7906 /* We've set op and dt above, from vect_get_store_rhs,
7907 and first_stmt_info == stmt_info. */
7912 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
,
7913 &vec_oprnds
, NULL
, slp_node
);
7914 vec_oprnd
= vec_oprnds
[0];
7918 op
= vect_get_store_rhs (next_stmt_info
);
7919 vec_oprnd
= vect_get_vec_def_for_operand
7920 (op
, next_stmt_info
);
7926 vec_oprnd
= vec_oprnds
[j
];
7928 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
7931 /* Pun the vector to extract from if necessary. */
7932 if (lvectype
!= vectype
)
7934 tree tem
= make_ssa_name (lvectype
);
7936 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7937 lvectype
, vec_oprnd
));
7938 vect_finish_stmt_generation (stmt_info
, pun
, gsi
);
7941 for (i
= 0; i
< nstores
; i
++)
7943 tree newref
, newoff
;
7944 gimple
*incr
, *assign
;
7945 tree size
= TYPE_SIZE (ltype
);
7946 /* Extract the i'th component. */
7947 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7948 bitsize_int (i
), size
);
7949 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7952 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7956 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7958 newref
= build2 (MEM_REF
, ltype
,
7959 running_off
, this_off
);
7960 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7962 /* And store it to *running_off. */
7963 assign
= gimple_build_assign (newref
, elem
);
7964 stmt_vec_info assign_info
7965 = vect_finish_stmt_generation (stmt_info
, assign
, gsi
);
7969 || group_el
== group_size
)
7971 newoff
= copy_ssa_name (running_off
, NULL
);
7972 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7973 running_off
, stride_step
);
7974 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7976 running_off
= newoff
;
7979 if (g
== group_size
- 1
7982 if (j
== 0 && i
== 0)
7983 STMT_VINFO_VEC_STMT (stmt_info
)
7984 = *vec_stmt
= assign_info
;
7986 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
7987 prev_stmt_info
= assign_info
;
7991 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7996 vec_oprnds
.release ();
8000 auto_vec
<tree
> dr_chain (group_size
);
8001 oprnds
.create (group_size
);
8003 alignment_support_scheme
8004 = vect_supportable_dr_alignment (first_dr_info
, false);
8005 gcc_assert (alignment_support_scheme
);
8006 vec_loop_masks
*loop_masks
8007 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8008 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8010 /* Targets with store-lane instructions must not require explicit
8011 realignment. vect_supportable_dr_alignment always returns either
8012 dr_aligned or dr_unaligned_supported for masked operations. */
8013 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8016 || alignment_support_scheme
== dr_aligned
8017 || alignment_support_scheme
== dr_unaligned_supported
);
8019 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
8020 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8021 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8024 tree vec_offset
= NULL_TREE
;
8025 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8027 aggr_type
= NULL_TREE
;
8030 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8032 aggr_type
= elem_type
;
8033 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8034 &bump
, &vec_offset
);
8038 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8039 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8041 aggr_type
= vectype
;
8042 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
8043 memory_access_type
);
8047 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8049 /* In case the vectorization factor (VF) is bigger than the number
8050 of elements that we can fit in a vectype (nunits), we have to generate
8051 more than one vector stmt - i.e - we need to "unroll" the
8052 vector stmt by a factor VF/nunits. For more details see documentation in
8053 vect_get_vec_def_for_copy_stmt. */
8055 /* In case of interleaving (non-unit grouped access):
8062 We create vectorized stores starting from base address (the access of the
8063 first stmt in the chain (S2 in the above example), when the last store stmt
8064 of the chain (S4) is reached:
8067 VS2: &base + vec_size*1 = vx0
8068 VS3: &base + vec_size*2 = vx1
8069 VS4: &base + vec_size*3 = vx3
8071 Then permutation statements are generated:
8073 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8074 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8077 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8078 (the order of the data-refs in the output of vect_permute_store_chain
8079 corresponds to the order of scalar stmts in the interleaving chain - see
8080 the documentation of vect_permute_store_chain()).
8082 In case of both multiple types and interleaving, above vector stores and
8083 permutation stmts are created for every copy. The result vector stmts are
8084 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8085 STMT_VINFO_RELATED_STMT for the next copies.
8088 prev_stmt_info
= NULL
;
8089 tree vec_mask
= NULL_TREE
;
8090 for (j
= 0; j
< ncopies
; j
++)
8092 stmt_vec_info new_stmt_info
;
8097 /* Get vectorized arguments for SLP_NODE. */
8098 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
8101 vec_oprnd
= vec_oprnds
[0];
8105 /* For interleaved stores we collect vectorized defs for all the
8106 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8107 used as an input to vect_permute_store_chain(), and OPRNDS as
8108 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8110 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8111 OPRNDS are of size 1. */
8112 stmt_vec_info next_stmt_info
= first_stmt_info
;
8113 for (i
= 0; i
< group_size
; i
++)
8115 /* Since gaps are not supported for interleaved stores,
8116 DR_GROUP_SIZE is the exact number of stmts in the chain.
8117 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8118 that there is no interleaving, DR_GROUP_SIZE is 1,
8119 and only one iteration of the loop will be executed. */
8120 op
= vect_get_store_rhs (next_stmt_info
);
8121 vec_oprnd
= vect_get_vec_def_for_operand
8122 (op
, next_stmt_info
);
8123 dr_chain
.quick_push (vec_oprnd
);
8124 oprnds
.quick_push (vec_oprnd
);
8125 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8128 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
8132 /* We should have catched mismatched types earlier. */
8133 gcc_assert (useless_type_conversion_p (vectype
,
8134 TREE_TYPE (vec_oprnd
)));
8135 bool simd_lane_access_p
8136 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8137 if (simd_lane_access_p
8139 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8140 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8141 && integer_zerop (get_dr_vinfo_offset (first_dr_info
))
8142 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8143 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8144 get_alias_set (TREE_TYPE (ref_type
))))
8146 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8147 dataref_offset
= build_int_cst (ref_type
, 0);
8149 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8150 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
8151 &dataref_ptr
, &vec_offset
);
8154 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
,
8155 simd_lane_access_p
? loop
: NULL
,
8156 offset
, &dummy
, gsi
, &ptr_incr
,
8157 simd_lane_access_p
, NULL_TREE
, bump
);
8161 /* For interleaved stores we created vectorized defs for all the
8162 defs stored in OPRNDS in the previous iteration (previous copy).
8163 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8164 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8166 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8167 OPRNDS are of size 1. */
8168 for (i
= 0; i
< group_size
; i
++)
8171 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
8172 dr_chain
[i
] = vec_oprnd
;
8173 oprnds
[i
] = vec_oprnd
;
8176 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8179 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8180 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8181 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8183 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8187 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8191 /* Get an array into which we can store the individual vectors. */
8192 vec_array
= create_vector_array (vectype
, vec_num
);
8194 /* Invalidate the current contents of VEC_ARRAY. This should
8195 become an RTL clobber too, which prevents the vector registers
8196 from being upward-exposed. */
8197 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8199 /* Store the individual vectors into the array. */
8200 for (i
= 0; i
< vec_num
; i
++)
8202 vec_oprnd
= dr_chain
[i
];
8203 write_vector_array (stmt_info
, gsi
, vec_oprnd
, vec_array
, i
);
8206 tree final_mask
= NULL
;
8208 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8211 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8218 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8220 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8221 tree alias_ptr
= build_int_cst (ref_type
, align
);
8222 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8223 dataref_ptr
, alias_ptr
,
8224 final_mask
, vec_array
);
8229 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8230 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8231 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8233 gimple_call_set_lhs (call
, data_ref
);
8235 gimple_call_set_nothrow (call
, true);
8236 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8238 /* Record that VEC_ARRAY is now dead. */
8239 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8243 new_stmt_info
= NULL
;
8247 result_chain
.create (group_size
);
8249 vect_permute_store_chain (dr_chain
, group_size
, stmt_info
, gsi
,
8253 stmt_vec_info next_stmt_info
= first_stmt_info
;
8254 for (i
= 0; i
< vec_num
; i
++)
8257 unsigned HOST_WIDE_INT align
;
8259 tree final_mask
= NULL_TREE
;
8261 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8263 vectype
, vec_num
* j
+ i
);
8265 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8268 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8270 tree scale
= size_int (gs_info
.scale
);
8273 call
= gimple_build_call_internal
8274 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8275 scale
, vec_oprnd
, final_mask
);
8277 call
= gimple_build_call_internal
8278 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8280 gimple_call_set_nothrow (call
, true);
8282 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8287 /* Bump the vector pointer. */
8288 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8292 vec_oprnd
= vec_oprnds
[i
];
8293 else if (grouped_store
)
8294 /* For grouped stores vectorized defs are interleaved in
8295 vect_permute_store_chain(). */
8296 vec_oprnd
= result_chain
[i
];
8298 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8299 if (aligned_access_p (first_dr_info
))
8301 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8303 align
= dr_alignment (vect_dr_behavior (first_dr_info
));
8307 misalign
= DR_MISALIGNMENT (first_dr_info
);
8308 if (dataref_offset
== NULL_TREE
8309 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8310 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8313 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8315 tree perm_mask
= perm_mask_for_reverse (vectype
);
8316 tree perm_dest
= vect_create_destination_var
8317 (vect_get_store_rhs (stmt_info
), vectype
);
8318 tree new_temp
= make_ssa_name (perm_dest
);
8320 /* Generate the permute statement. */
8322 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8323 vec_oprnd
, perm_mask
);
8324 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
8326 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8327 vec_oprnd
= new_temp
;
8330 /* Arguments are ready. Create the new vector stmt. */
8333 align
= least_bit_hwi (misalign
| align
);
8334 tree ptr
= build_int_cst (ref_type
, align
);
8336 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8338 final_mask
, vec_oprnd
);
8339 gimple_call_set_nothrow (call
, true);
8341 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8345 data_ref
= fold_build2 (MEM_REF
, vectype
,
8349 : build_int_cst (ref_type
, 0));
8350 if (aligned_access_p (first_dr_info
))
8352 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8353 TREE_TYPE (data_ref
)
8354 = build_aligned_type (TREE_TYPE (data_ref
),
8355 align
* BITS_PER_UNIT
);
8357 TREE_TYPE (data_ref
)
8358 = build_aligned_type (TREE_TYPE (data_ref
),
8359 TYPE_ALIGN (elem_type
));
8360 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8362 = gimple_build_assign (data_ref
, vec_oprnd
);
8364 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8370 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8371 if (!next_stmt_info
)
8378 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8380 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8381 prev_stmt_info
= new_stmt_info
;
8386 result_chain
.release ();
8387 vec_oprnds
.release ();
8392 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8393 VECTOR_CST mask. No checks are made that the target platform supports the
8394 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8395 vect_gen_perm_mask_checked. */
8398 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8402 poly_uint64 nunits
= sel
.length ();
8403 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8405 mask_type
= build_vector_type (ssizetype
, nunits
);
8406 return vec_perm_indices_to_tree (mask_type
, sel
);
8409 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8410 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8413 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8415 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8416 return vect_gen_perm_mask_any (vectype
, sel
);
8419 /* Given a vector variable X and Y, that was generated for the scalar
8420 STMT_INFO, generate instructions to permute the vector elements of X and Y
8421 using permutation mask MASK_VEC, insert them at *GSI and return the
8422 permuted vector variable. */
8425 permute_vec_elements (tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8426 gimple_stmt_iterator
*gsi
)
8428 tree vectype
= TREE_TYPE (x
);
8429 tree perm_dest
, data_ref
;
8432 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8433 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8434 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8436 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8437 data_ref
= make_ssa_name (perm_dest
);
8439 /* Generate the permute statement. */
8440 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8441 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
8446 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8447 inserting them on the loops preheader edge. Returns true if we
8448 were successful in doing so (and thus STMT_INFO can be moved then),
8449 otherwise returns false. */
8452 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8458 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8460 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8461 if (!gimple_nop_p (def_stmt
)
8462 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8464 /* Make sure we don't need to recurse. While we could do
8465 so in simple cases when there are more complex use webs
8466 we don't have an easy way to preserve stmt order to fulfil
8467 dependencies within them. */
8470 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8472 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8474 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8475 if (!gimple_nop_p (def_stmt2
)
8476 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8486 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8488 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8489 if (!gimple_nop_p (def_stmt
)
8490 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8492 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8493 gsi_remove (&gsi
, false);
8494 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8501 /* vectorizable_load.
8503 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8504 that can be vectorized.
8505 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8506 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8507 Return true if STMT_INFO is vectorizable in this way. */
8510 vectorizable_load (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8511 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
8512 slp_instance slp_node_instance
,
8513 stmt_vector_for_cost
*cost_vec
)
8516 tree vec_dest
= NULL
;
8517 tree data_ref
= NULL
;
8518 stmt_vec_info prev_stmt_info
;
8519 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8520 class loop
*loop
= NULL
;
8521 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8522 bool nested_in_vect_loop
= false;
8527 enum dr_alignment_support alignment_support_scheme
;
8528 tree dataref_ptr
= NULL_TREE
;
8529 tree dataref_offset
= NULL_TREE
;
8530 gimple
*ptr_incr
= NULL
;
8533 unsigned int group_size
;
8534 poly_uint64 group_gap_adj
;
8535 tree msq
= NULL_TREE
, lsq
;
8536 tree offset
= NULL_TREE
;
8537 tree byte_offset
= NULL_TREE
;
8538 tree realignment_token
= NULL_TREE
;
8540 vec
<tree
> dr_chain
= vNULL
;
8541 bool grouped_load
= false;
8542 stmt_vec_info first_stmt_info
;
8543 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8544 bool compute_in_loop
= false;
8545 class loop
*at_loop
;
8547 bool slp
= (slp_node
!= NULL
);
8548 bool slp_perm
= false;
8549 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8552 gather_scatter_info gs_info
;
8553 vec_info
*vinfo
= stmt_info
->vinfo
;
8555 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8557 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8560 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8564 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8565 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8567 scalar_dest
= gimple_assign_lhs (assign
);
8568 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8571 tree_code code
= gimple_assign_rhs_code (assign
);
8572 if (code
!= ARRAY_REF
8573 && code
!= BIT_FIELD_REF
8574 && code
!= INDIRECT_REF
8575 && code
!= COMPONENT_REF
8576 && code
!= IMAGPART_EXPR
8577 && code
!= REALPART_EXPR
8579 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8584 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8585 if (!call
|| !gimple_call_internal_p (call
))
8588 internal_fn ifn
= gimple_call_internal_fn (call
);
8589 if (!internal_load_fn_p (ifn
))
8592 scalar_dest
= gimple_call_lhs (call
);
8596 int mask_index
= internal_fn_mask_index (ifn
);
8597 if (mask_index
>= 0)
8599 mask
= gimple_call_arg (call
, mask_index
);
8600 if (!vect_check_scalar_mask (stmt_info
, mask
, &mask_dt
,
8606 if (!STMT_VINFO_DATA_REF (stmt_info
))
8609 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8610 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8614 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8615 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8616 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8621 /* Multiple types in SLP are handled by creating the appropriate number of
8622 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8627 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8629 gcc_assert (ncopies
>= 1);
8631 /* FORNOW. This restriction should be relaxed. */
8632 if (nested_in_vect_loop
&& ncopies
> 1)
8634 if (dump_enabled_p ())
8635 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8636 "multiple types in nested loop.\n");
8640 /* Invalidate assumptions made by dependence analysis when vectorization
8641 on the unrolled body effectively re-orders stmts. */
8643 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8644 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8645 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8647 if (dump_enabled_p ())
8648 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8649 "cannot perform implicit CSE when unrolling "
8650 "with negative dependence distance\n");
8654 elem_type
= TREE_TYPE (vectype
);
8655 mode
= TYPE_MODE (vectype
);
8657 /* FORNOW. In some cases can vectorize even if data-type not supported
8658 (e.g. - data copies). */
8659 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8661 if (dump_enabled_p ())
8662 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8663 "Aligned load, but unsupported type.\n");
8667 /* Check if the load is a part of an interleaving chain. */
8668 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8670 grouped_load
= true;
8672 gcc_assert (!nested_in_vect_loop
);
8673 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8675 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8676 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8678 /* Refuse non-SLP vectorization of SLP-only groups. */
8679 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8681 if (dump_enabled_p ())
8682 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8683 "cannot vectorize load in non-SLP mode.\n");
8687 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8690 /* Invalidate assumptions made by dependence analysis when vectorization
8691 on the unrolled body effectively re-orders stmts. */
8692 if (!PURE_SLP_STMT (stmt_info
)
8693 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8694 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8695 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8697 if (dump_enabled_p ())
8698 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8699 "cannot perform implicit CSE when performing "
8700 "group loads with negative dependence distance\n");
8707 vect_memory_access_type memory_access_type
;
8708 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
8709 &memory_access_type
, &gs_info
))
8714 if (memory_access_type
== VMAT_CONTIGUOUS
)
8716 machine_mode vec_mode
= TYPE_MODE (vectype
);
8717 if (!VECTOR_MODE_P (vec_mode
)
8718 || !can_vec_mask_load_store_p (vec_mode
,
8719 TYPE_MODE (mask_vectype
), true))
8722 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8723 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8725 if (dump_enabled_p ())
8726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8727 "unsupported access type for masked load.\n");
8732 if (!vec_stmt
) /* transformation not required. */
8735 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8738 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
8739 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
8740 memory_access_type
, &gs_info
, mask
);
8742 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8743 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
8744 slp_node_instance
, slp_node
, cost_vec
);
8749 gcc_assert (memory_access_type
8750 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8752 if (dump_enabled_p ())
8753 dump_printf_loc (MSG_NOTE
, vect_location
,
8754 "transform load. ncopies = %d\n", ncopies
);
8758 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8759 ensure_base_align (dr_info
);
8761 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8763 vect_build_gather_load_calls (stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8767 if (memory_access_type
== VMAT_INVARIANT
)
8769 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8770 /* If we have versioned for aliasing or the loop doesn't
8771 have any data dependencies that would preclude this,
8772 then we are sure this is a loop invariant load and
8773 thus we can insert it on the preheader edge. */
8774 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8775 && !nested_in_vect_loop
8776 && hoist_defs_of_uses (stmt_info
, loop
));
8779 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8780 if (dump_enabled_p ())
8781 dump_printf_loc (MSG_NOTE
, vect_location
,
8782 "hoisting out of the vectorized loop: %G", stmt
);
8783 scalar_dest
= copy_ssa_name (scalar_dest
);
8784 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8785 gsi_insert_on_edge_immediate
8786 (loop_preheader_edge (loop
),
8787 gimple_build_assign (scalar_dest
, rhs
));
8789 /* These copies are all equivalent, but currently the representation
8790 requires a separate STMT_VINFO_VEC_STMT for each one. */
8791 prev_stmt_info
= NULL
;
8792 gimple_stmt_iterator gsi2
= *gsi
;
8794 for (j
= 0; j
< ncopies
; j
++)
8796 stmt_vec_info new_stmt_info
;
8799 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8801 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8802 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
8806 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8808 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8811 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8813 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8815 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8816 prev_stmt_info
= new_stmt_info
;
8821 if (memory_access_type
== VMAT_ELEMENTWISE
8822 || memory_access_type
== VMAT_STRIDED_SLP
)
8824 gimple_stmt_iterator incr_gsi
;
8830 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8831 tree stride_base
, stride_step
, alias_off
;
8832 /* Checked by get_load_store_type. */
8833 unsigned int const_nunits
= nunits
.to_constant ();
8834 unsigned HOST_WIDE_INT cst_offset
= 0;
8837 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8838 gcc_assert (!nested_in_vect_loop
);
8842 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8843 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8847 first_stmt_info
= stmt_info
;
8848 first_dr_info
= dr_info
;
8850 if (slp
&& grouped_load
)
8852 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8853 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8859 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8860 * vect_get_place_in_interleaving_chain (stmt_info
,
8863 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8866 dr_offset
= get_dr_vinfo_offset (first_dr_info
);
8868 = fold_build_pointer_plus
8869 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8870 size_binop (PLUS_EXPR
,
8871 convert_to_ptrofftype (dr_offset
),
8872 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8873 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8875 /* For a load with loop-invariant (but other than power-of-2)
8876 stride (i.e. not a grouped access) like so:
8878 for (i = 0; i < n; i += stride)
8881 we generate a new induction variable and new accesses to
8882 form a new vector (or vectors, depending on ncopies):
8884 for (j = 0; ; j += VF*stride)
8886 tmp2 = array[j + stride];
8888 vectemp = {tmp1, tmp2, ...}
8891 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8892 build_int_cst (TREE_TYPE (stride_step
), vf
));
8894 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8896 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8897 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8898 create_iv (stride_base
, ivstep
, NULL
,
8899 loop
, &incr_gsi
, insert_after
,
8901 incr
= gsi_stmt (incr_gsi
);
8902 loop_vinfo
->add_stmt (incr
);
8904 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8906 prev_stmt_info
= NULL
;
8907 running_off
= offvar
;
8908 alias_off
= build_int_cst (ref_type
, 0);
8909 int nloads
= const_nunits
;
8911 tree ltype
= TREE_TYPE (vectype
);
8912 tree lvectype
= vectype
;
8913 auto_vec
<tree
> dr_chain
;
8914 if (memory_access_type
== VMAT_STRIDED_SLP
)
8916 if (group_size
< const_nunits
)
8918 /* First check if vec_init optab supports construction from
8919 vector elts directly. */
8920 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
8922 if (VECTOR_MODE_P (TYPE_MODE (vectype
))
8923 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8924 group_size
).exists (&vmode
)
8925 && (convert_optab_handler (vec_init_optab
,
8926 TYPE_MODE (vectype
), vmode
)
8927 != CODE_FOR_nothing
))
8929 nloads
= const_nunits
/ group_size
;
8931 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
8935 /* Otherwise avoid emitting a constructor of vector elements
8936 by performing the loads using an integer type of the same
8937 size, constructing a vector of those and then
8938 re-interpreting it as the original vector type.
8939 This avoids a huge runtime penalty due to the general
8940 inability to perform store forwarding from smaller stores
8941 to a larger load. */
8943 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
8944 unsigned int lnunits
= const_nunits
/ group_size
;
8945 /* If we can't construct such a vector fall back to
8946 element loads of the original vector type. */
8947 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8948 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8949 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8950 lnunits
).exists (&vmode
)
8951 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
8952 != CODE_FOR_nothing
))
8956 ltype
= build_nonstandard_integer_type (lsize
, 1);
8957 lvectype
= build_vector_type (ltype
, nloads
);
8964 lnel
= const_nunits
;
8967 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8969 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8970 else if (nloads
== 1)
8975 /* For SLP permutation support we need to load the whole group,
8976 not only the number of vector stmts the permutation result
8980 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8982 unsigned int const_vf
= vf
.to_constant ();
8983 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
8984 dr_chain
.create (ncopies
);
8987 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8989 unsigned int group_el
= 0;
8990 unsigned HOST_WIDE_INT
8991 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8992 for (j
= 0; j
< ncopies
; j
++)
8995 vec_alloc (v
, nloads
);
8996 stmt_vec_info new_stmt_info
= NULL
;
8997 for (i
= 0; i
< nloads
; i
++)
8999 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9000 group_el
* elsz
+ cst_offset
);
9001 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9002 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9004 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9006 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9008 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9009 gimple_assign_lhs (new_stmt
));
9013 || group_el
== group_size
)
9015 tree newoff
= copy_ssa_name (running_off
);
9016 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9017 running_off
, stride_step
);
9018 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
9020 running_off
= newoff
;
9026 tree vec_inv
= build_constructor (lvectype
, v
);
9027 new_temp
= vect_init_vector (stmt_info
, vec_inv
, lvectype
, gsi
);
9028 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9029 if (lvectype
!= vectype
)
9032 = gimple_build_assign (make_ssa_name (vectype
),
9034 build1 (VIEW_CONVERT_EXPR
,
9035 vectype
, new_temp
));
9037 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9044 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
9046 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9051 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9053 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9054 prev_stmt_info
= new_stmt_info
;
9060 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
9061 slp_node_instance
, false, &n_perms
);
9066 if (memory_access_type
== VMAT_GATHER_SCATTER
9067 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9068 grouped_load
= false;
9072 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9073 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9074 /* For SLP vectorization we directly vectorize a subchain
9075 without permutation. */
9076 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9077 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9078 /* For BB vectorization always use the first stmt to base
9079 the data ref pointer on. */
9081 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9083 /* Check if the chain of loads is already vectorized. */
9084 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
9085 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9086 ??? But we can only do so if there is exactly one
9087 as we have no way to get at the rest. Leave the CSE
9089 ??? With the group load eventually participating
9090 in multiple different permutations (having multiple
9091 slp nodes which refer to the same group) the CSE
9092 is even wrong code. See PR56270. */
9095 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9098 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9101 /* VEC_NUM is the number of vect stmts to be created for this group. */
9104 grouped_load
= false;
9105 /* If an SLP permutation is from N elements to N elements,
9106 and if one vector holds a whole number of N, we can load
9107 the inputs to the permutation in the same way as an
9108 unpermuted sequence. In other cases we need to load the
9109 whole group, not only the number of vector stmts the
9110 permutation result fits in. */
9112 && (group_size
!= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
)
9113 || !multiple_p (nunits
, group_size
)))
9115 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9116 variable VF; see vect_transform_slp_perm_load. */
9117 unsigned int const_vf
= vf
.to_constant ();
9118 unsigned int const_nunits
= nunits
.to_constant ();
9119 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9120 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9124 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9126 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
9130 vec_num
= group_size
;
9132 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9136 first_stmt_info
= stmt_info
;
9137 first_dr_info
= dr_info
;
9138 group_size
= vec_num
= 1;
9140 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9143 alignment_support_scheme
9144 = vect_supportable_dr_alignment (first_dr_info
, false);
9145 gcc_assert (alignment_support_scheme
);
9146 vec_loop_masks
*loop_masks
9147 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9148 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9150 /* Targets with store-lane instructions must not require explicit
9151 realignment. vect_supportable_dr_alignment always returns either
9152 dr_aligned or dr_unaligned_supported for masked operations. */
9153 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9156 || alignment_support_scheme
== dr_aligned
9157 || alignment_support_scheme
== dr_unaligned_supported
);
9159 /* In case the vectorization factor (VF) is bigger than the number
9160 of elements that we can fit in a vectype (nunits), we have to generate
9161 more than one vector stmt - i.e - we need to "unroll" the
9162 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9163 from one copy of the vector stmt to the next, in the field
9164 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9165 stages to find the correct vector defs to be used when vectorizing
9166 stmts that use the defs of the current stmt. The example below
9167 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9168 need to create 4 vectorized stmts):
9170 before vectorization:
9171 RELATED_STMT VEC_STMT
9175 step 1: vectorize stmt S1:
9176 We first create the vector stmt VS1_0, and, as usual, record a
9177 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9178 Next, we create the vector stmt VS1_1, and record a pointer to
9179 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9180 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9182 RELATED_STMT VEC_STMT
9183 VS1_0: vx0 = memref0 VS1_1 -
9184 VS1_1: vx1 = memref1 VS1_2 -
9185 VS1_2: vx2 = memref2 VS1_3 -
9186 VS1_3: vx3 = memref3 - -
9187 S1: x = load - VS1_0
9190 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9191 information we recorded in RELATED_STMT field is used to vectorize
9194 /* In case of interleaving (non-unit grouped access):
9201 Vectorized loads are created in the order of memory accesses
9202 starting from the access of the first stmt of the chain:
9205 VS2: vx1 = &base + vec_size*1
9206 VS3: vx3 = &base + vec_size*2
9207 VS4: vx4 = &base + vec_size*3
9209 Then permutation statements are generated:
9211 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9212 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9215 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9216 (the order of the data-refs in the output of vect_permute_load_chain
9217 corresponds to the order of scalar stmts in the interleaving chain - see
9218 the documentation of vect_permute_load_chain()).
9219 The generation of permutation stmts and recording them in
9220 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9222 In case of both multiple types and interleaving, the vector loads and
9223 permutation stmts above are created for every copy. The result vector
9224 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9225 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9227 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9228 on a target that supports unaligned accesses (dr_unaligned_supported)
9229 we generate the following code:
9233 p = p + indx * vectype_size;
9238 Otherwise, the data reference is potentially unaligned on a target that
9239 does not support unaligned accesses (dr_explicit_realign_optimized) -
9240 then generate the following code, in which the data in each iteration is
9241 obtained by two vector loads, one from the previous iteration, and one
9242 from the current iteration:
9244 msq_init = *(floor(p1))
9245 p2 = initial_addr + VS - 1;
9246 realignment_token = call target_builtin;
9249 p2 = p2 + indx * vectype_size
9251 vec_dest = realign_load (msq, lsq, realignment_token)
9256 /* If the misalignment remains the same throughout the execution of the
9257 loop, we can create the init_addr and permutation mask at the loop
9258 preheader. Otherwise, it needs to be created inside the loop.
9259 This can only occur when vectorizing memory accesses in the inner-loop
9260 nested within an outer-loop that is being vectorized. */
9262 if (nested_in_vect_loop
9263 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9264 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9266 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9267 compute_in_loop
= true;
9270 bool diff_first_stmt_info
9271 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9273 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9274 || alignment_support_scheme
== dr_explicit_realign
)
9275 && !compute_in_loop
)
9277 /* If we have different first_stmt_info, we can't set up realignment
9278 here, since we can't guarantee first_stmt_info DR has been
9279 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9280 distance from first_stmt_info DR instead as below. */
9281 if (!diff_first_stmt_info
)
9282 msq
= vect_setup_realignment (first_stmt_info
, gsi
, &realignment_token
,
9283 alignment_support_scheme
, NULL_TREE
,
9285 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9287 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9288 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9290 gcc_assert (!first_stmt_info_for_drptr
);
9296 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9297 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9300 tree vec_offset
= NULL_TREE
;
9301 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9303 aggr_type
= NULL_TREE
;
9306 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9308 aggr_type
= elem_type
;
9309 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9310 &bump
, &vec_offset
);
9314 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9315 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9317 aggr_type
= vectype
;
9318 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
9319 memory_access_type
);
9322 tree vec_mask
= NULL_TREE
;
9323 prev_stmt_info
= NULL
;
9324 poly_uint64 group_elt
= 0;
9325 for (j
= 0; j
< ncopies
; j
++)
9327 stmt_vec_info new_stmt_info
= NULL
;
9328 /* 1. Create the vector or array pointer update chain. */
9331 bool simd_lane_access_p
9332 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9333 if (simd_lane_access_p
9334 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9335 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9336 && integer_zerop (get_dr_vinfo_offset (first_dr_info
))
9337 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9338 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9339 get_alias_set (TREE_TYPE (ref_type
)))
9340 && (alignment_support_scheme
== dr_aligned
9341 || alignment_support_scheme
== dr_unaligned_supported
))
9343 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9344 dataref_offset
= build_int_cst (ref_type
, 0);
9346 else if (diff_first_stmt_info
)
9349 = vect_create_data_ref_ptr (first_stmt_info_for_drptr
,
9350 aggr_type
, at_loop
, offset
, &dummy
,
9351 gsi
, &ptr_incr
, simd_lane_access_p
,
9353 /* Adjust the pointer by the difference to first_stmt. */
9354 data_reference_p ptrdr
9355 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9357 = fold_convert (sizetype
,
9358 size_binop (MINUS_EXPR
,
9359 DR_INIT (first_dr_info
->dr
),
9361 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9363 if (alignment_support_scheme
== dr_explicit_realign
)
9365 msq
= vect_setup_realignment (first_stmt_info_for_drptr
, gsi
,
9367 alignment_support_scheme
,
9368 dataref_ptr
, &at_loop
);
9369 gcc_assert (!compute_in_loop
);
9372 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9373 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
9374 &dataref_ptr
, &vec_offset
);
9377 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
, at_loop
,
9378 offset
, &dummy
, gsi
, &ptr_incr
,
9385 auto_vec
<vec
<tree
> > vec_defs (1);
9386 vect_get_slp_defs (slp_node
, &vec_defs
);
9387 vec_mask
= vec_defs
[0][0];
9390 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
9397 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9399 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9400 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
9402 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9405 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
9408 if (grouped_load
|| slp_perm
)
9409 dr_chain
.create (vec_num
);
9411 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9415 vec_array
= create_vector_array (vectype
, vec_num
);
9417 tree final_mask
= NULL_TREE
;
9419 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9422 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9429 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9431 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9432 tree alias_ptr
= build_int_cst (ref_type
, align
);
9433 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9434 dataref_ptr
, alias_ptr
,
9440 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9441 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9442 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9444 gimple_call_set_lhs (call
, vec_array
);
9445 gimple_call_set_nothrow (call
, true);
9446 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
9448 /* Extract each vector into an SSA_NAME. */
9449 for (i
= 0; i
< vec_num
; i
++)
9451 new_temp
= read_vector_array (stmt_info
, gsi
, scalar_dest
,
9453 dr_chain
.quick_push (new_temp
);
9456 /* Record the mapping between SSA_NAMEs and statements. */
9457 vect_record_grouped_load_vectors (stmt_info
, dr_chain
);
9459 /* Record that VEC_ARRAY is now dead. */
9460 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
9464 for (i
= 0; i
< vec_num
; i
++)
9466 tree final_mask
= NULL_TREE
;
9468 && memory_access_type
!= VMAT_INVARIANT
)
9469 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9471 vectype
, vec_num
* j
+ i
);
9473 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9477 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9480 /* 2. Create the vector-load in the loop. */
9481 gimple
*new_stmt
= NULL
;
9482 switch (alignment_support_scheme
)
9485 case dr_unaligned_supported
:
9487 unsigned int misalign
;
9488 unsigned HOST_WIDE_INT align
;
9490 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9492 tree zero
= build_zero_cst (vectype
);
9493 tree scale
= size_int (gs_info
.scale
);
9496 call
= gimple_build_call_internal
9497 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9498 vec_offset
, scale
, zero
, final_mask
);
9500 call
= gimple_build_call_internal
9501 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9502 vec_offset
, scale
, zero
);
9503 gimple_call_set_nothrow (call
, true);
9505 data_ref
= NULL_TREE
;
9510 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9511 if (alignment_support_scheme
== dr_aligned
)
9513 gcc_assert (aligned_access_p (first_dr_info
));
9516 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9518 align
= dr_alignment
9519 (vect_dr_behavior (first_dr_info
));
9523 misalign
= DR_MISALIGNMENT (first_dr_info
);
9524 if (dataref_offset
== NULL_TREE
9525 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9526 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9531 align
= least_bit_hwi (misalign
| align
);
9532 tree ptr
= build_int_cst (ref_type
, align
);
9534 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9537 gimple_call_set_nothrow (call
, true);
9539 data_ref
= NULL_TREE
;
9543 tree ltype
= vectype
;
9544 /* If there's no peeling for gaps but we have a gap
9545 with slp loads then load the lower half of the
9546 vector only. See get_group_load_store_type for
9547 when we apply this optimization. */
9550 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9551 && DR_GROUP_GAP (first_stmt_info
) != 0
9552 && known_eq (nunits
,
9554 - DR_GROUP_GAP (first_stmt_info
)) * 2)
9555 && known_eq (nunits
, group_size
))
9556 ltype
= build_vector_type (TREE_TYPE (vectype
),
9559 (first_stmt_info
)));
9561 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
,
9564 : build_int_cst (ref_type
, 0));
9565 if (alignment_support_scheme
== dr_aligned
)
9567 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9568 TREE_TYPE (data_ref
)
9569 = build_aligned_type (TREE_TYPE (data_ref
),
9570 align
* BITS_PER_UNIT
);
9572 TREE_TYPE (data_ref
)
9573 = build_aligned_type (TREE_TYPE (data_ref
),
9574 TYPE_ALIGN (elem_type
));
9575 if (ltype
!= vectype
)
9577 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9578 tree tem
= make_ssa_name (ltype
);
9579 new_stmt
= gimple_build_assign (tem
, data_ref
);
9580 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9582 vec
<constructor_elt
, va_gc
> *v
;
9584 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9585 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9586 build_zero_cst (ltype
));
9588 = gimple_build_assign (vec_dest
,
9595 case dr_explicit_realign
:
9599 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9601 if (compute_in_loop
)
9602 msq
= vect_setup_realignment (first_stmt_info
, gsi
,
9604 dr_explicit_realign
,
9607 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9608 ptr
= copy_ssa_name (dataref_ptr
);
9610 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9611 // For explicit realign the target alignment should be
9612 // known at compile time.
9613 unsigned HOST_WIDE_INT align
=
9614 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9615 new_stmt
= gimple_build_assign
9616 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9618 (TREE_TYPE (dataref_ptr
),
9619 -(HOST_WIDE_INT
) align
));
9620 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9622 = build2 (MEM_REF
, vectype
, ptr
,
9623 build_int_cst (ref_type
, 0));
9624 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9625 vec_dest
= vect_create_destination_var (scalar_dest
,
9627 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9628 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9629 gimple_assign_set_lhs (new_stmt
, new_temp
);
9630 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9631 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9634 bump
= size_binop (MULT_EXPR
, vs
,
9635 TYPE_SIZE_UNIT (elem_type
));
9636 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9637 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
,
9639 new_stmt
= gimple_build_assign
9640 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9642 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9643 ptr
= copy_ssa_name (ptr
, new_stmt
);
9644 gimple_assign_set_lhs (new_stmt
, ptr
);
9645 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9647 = build2 (MEM_REF
, vectype
, ptr
,
9648 build_int_cst (ref_type
, 0));
9651 case dr_explicit_realign_optimized
:
9653 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9654 new_temp
= copy_ssa_name (dataref_ptr
);
9656 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9657 // We should only be doing this if we know the target
9658 // alignment at compile time.
9659 unsigned HOST_WIDE_INT align
=
9660 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9661 new_stmt
= gimple_build_assign
9662 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9663 build_int_cst (TREE_TYPE (dataref_ptr
),
9664 -(HOST_WIDE_INT
) align
));
9665 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9667 = build2 (MEM_REF
, vectype
, new_temp
,
9668 build_int_cst (ref_type
, 0));
9674 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9675 /* DATA_REF is null if we've already built the statement. */
9678 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9679 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9681 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9682 gimple_set_lhs (new_stmt
, new_temp
);
9684 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9686 /* 3. Handle explicit realignment if necessary/supported.
9688 vec_dest = realign_load (msq, lsq, realignment_token) */
9689 if (alignment_support_scheme
== dr_explicit_realign_optimized
9690 || alignment_support_scheme
== dr_explicit_realign
)
9692 lsq
= gimple_assign_lhs (new_stmt
);
9693 if (!realignment_token
)
9694 realignment_token
= dataref_ptr
;
9695 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9696 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9697 msq
, lsq
, realignment_token
);
9698 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9699 gimple_assign_set_lhs (new_stmt
, new_temp
);
9701 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9703 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9706 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9707 add_phi_arg (phi
, lsq
,
9708 loop_latch_edge (containing_loop
),
9714 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9716 tree perm_mask
= perm_mask_for_reverse (vectype
);
9717 new_temp
= permute_vec_elements (new_temp
, new_temp
,
9718 perm_mask
, stmt_info
, gsi
);
9719 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9722 /* Collect vector loads and later create their permutation in
9723 vect_transform_grouped_load (). */
9724 if (grouped_load
|| slp_perm
)
9725 dr_chain
.quick_push (new_temp
);
9727 /* Store vector loads in the corresponding SLP_NODE. */
9728 if (slp
&& !slp_perm
)
9729 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9731 /* With SLP permutation we load the gaps as well, without
9732 we need to skip the gaps after we manage to fully load
9733 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9734 group_elt
+= nunits
;
9735 if (maybe_ne (group_gap_adj
, 0U)
9737 && known_eq (group_elt
, group_size
- group_gap_adj
))
9739 poly_wide_int bump_val
9740 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9742 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9743 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9748 /* Bump the vector pointer to account for a gap or for excess
9749 elements loaded for a permuted SLP load. */
9750 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9752 poly_wide_int bump_val
9753 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9755 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9756 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9761 if (slp
&& !slp_perm
)
9767 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
9768 slp_node_instance
, false,
9771 dr_chain
.release ();
9779 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9780 vect_transform_grouped_load (stmt_info
, dr_chain
,
9782 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9787 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9789 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9790 prev_stmt_info
= new_stmt_info
;
9793 dr_chain
.release ();
9799 /* Function vect_is_simple_cond.
9802 LOOP - the loop that is being vectorized.
9803 COND - Condition that is checked for simple use.
9806 *COMP_VECTYPE - the vector type for the comparison.
9807 *DTS - The def types for the arguments of the comparison
9809 Returns whether a COND can be vectorized. Checks whether
9810 condition operands are supportable using vec_is_simple_use. */
9813 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, slp_tree slp_node
,
9814 tree
*comp_vectype
, enum vect_def_type
*dts
,
9818 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9821 if (TREE_CODE (cond
) == SSA_NAME
9822 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9824 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
9826 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9831 if (!COMPARISON_CLASS_P (cond
))
9834 lhs
= TREE_OPERAND (cond
, 0);
9835 rhs
= TREE_OPERAND (cond
, 1);
9837 if (TREE_CODE (lhs
) == SSA_NAME
)
9839 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
9842 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9843 || TREE_CODE (lhs
) == FIXED_CST
)
9844 dts
[0] = vect_constant_def
;
9848 if (TREE_CODE (rhs
) == SSA_NAME
)
9850 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
9853 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
9854 || TREE_CODE (rhs
) == FIXED_CST
)
9855 dts
[1] = vect_constant_def
;
9859 if (vectype1
&& vectype2
9860 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9861 TYPE_VECTOR_SUBPARTS (vectype2
)))
9864 *comp_vectype
= vectype1
? vectype1
: vectype2
;
9865 /* Invariant comparison. */
9866 if (! *comp_vectype
)
9868 tree scalar_type
= TREE_TYPE (lhs
);
9869 /* If we can widen the comparison to match vectype do so. */
9870 if (INTEGRAL_TYPE_P (scalar_type
)
9872 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
9873 TYPE_SIZE (TREE_TYPE (vectype
))))
9874 scalar_type
= build_nonstandard_integer_type
9875 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
9876 TYPE_UNSIGNED (scalar_type
));
9877 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
9884 /* vectorizable_condition.
9886 Check if STMT_INFO is conditional modify expression that can be vectorized.
9887 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9888 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9891 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9893 Return true if STMT_INFO is vectorizable in this way. */
9896 vectorizable_condition (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9897 stmt_vec_info
*vec_stmt
,
9898 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9900 vec_info
*vinfo
= stmt_info
->vinfo
;
9901 tree scalar_dest
= NULL_TREE
;
9902 tree vec_dest
= NULL_TREE
;
9903 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
9904 tree then_clause
, else_clause
;
9905 tree comp_vectype
= NULL_TREE
;
9906 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
9907 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
9910 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9911 enum vect_def_type dts
[4]
9912 = {vect_unknown_def_type
, vect_unknown_def_type
,
9913 vect_unknown_def_type
, vect_unknown_def_type
};
9917 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9918 stmt_vec_info prev_stmt_info
= NULL
;
9920 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9921 vec
<tree
> vec_oprnds0
= vNULL
;
9922 vec
<tree
> vec_oprnds1
= vNULL
;
9923 vec
<tree
> vec_oprnds2
= vNULL
;
9924 vec
<tree
> vec_oprnds3
= vNULL
;
9926 bool masked
= false;
9928 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9931 /* Is vectorizable conditional operation? */
9932 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9936 code
= gimple_assign_rhs_code (stmt
);
9937 if (code
!= COND_EXPR
)
9940 stmt_vec_info reduc_info
= NULL
;
9941 int reduc_index
= -1;
9942 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
9944 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
9947 if (STMT_SLP_TYPE (stmt_info
))
9949 reduc_info
= info_for_reduction (stmt_info
);
9950 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
9951 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
9952 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
9953 || reduc_index
!= -1);
9957 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
9960 /* FORNOW: only supported as part of a reduction. */
9961 if (STMT_VINFO_LIVE_P (stmt_info
))
9963 if (dump_enabled_p ())
9964 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9965 "value used after loop.\n");
9970 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9971 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9976 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9980 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9984 gcc_assert (ncopies
>= 1);
9985 if (for_reduction
&& ncopies
> 1)
9986 return false; /* FORNOW */
9988 cond_expr
= gimple_assign_rhs1 (stmt
);
9989 then_clause
= gimple_assign_rhs2 (stmt
);
9990 else_clause
= gimple_assign_rhs3 (stmt
);
9992 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, slp_node
,
9993 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
9997 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
9999 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
10002 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10005 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10008 masked
= !COMPARISON_CLASS_P (cond_expr
);
10009 vec_cmp_type
= truth_type_for (comp_vectype
);
10011 if (vec_cmp_type
== NULL_TREE
)
10014 cond_code
= TREE_CODE (cond_expr
);
10017 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10018 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10021 /* For conditional reductions, the "then" value needs to be the candidate
10022 value calculated by this iteration while the "else" value needs to be
10023 the result carried over from previous iterations. If the COND_EXPR
10024 is the other way around, we need to swap it. */
10025 bool must_invert_cmp_result
= false;
10026 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10029 must_invert_cmp_result
= true;
10032 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10033 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10034 if (new_code
== ERROR_MARK
)
10035 must_invert_cmp_result
= true;
10038 cond_code
= new_code
;
10039 /* Make sure we don't accidentally use the old condition. */
10040 cond_expr
= NULL_TREE
;
10043 std::swap (then_clause
, else_clause
);
10046 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10048 /* Boolean values may have another representation in vectors
10049 and therefore we prefer bit operations over comparison for
10050 them (which also works for scalar masks). We store opcodes
10051 to use in bitop1 and bitop2. Statement is vectorized as
10052 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10053 depending on bitop1 and bitop2 arity. */
10057 bitop1
= BIT_NOT_EXPR
;
10058 bitop2
= BIT_AND_EXPR
;
10061 bitop1
= BIT_NOT_EXPR
;
10062 bitop2
= BIT_IOR_EXPR
;
10065 bitop1
= BIT_NOT_EXPR
;
10066 bitop2
= BIT_AND_EXPR
;
10067 std::swap (cond_expr0
, cond_expr1
);
10070 bitop1
= BIT_NOT_EXPR
;
10071 bitop2
= BIT_IOR_EXPR
;
10072 std::swap (cond_expr0
, cond_expr1
);
10075 bitop1
= BIT_XOR_EXPR
;
10078 bitop1
= BIT_XOR_EXPR
;
10079 bitop2
= BIT_NOT_EXPR
;
10084 cond_code
= SSA_NAME
;
10087 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10088 && reduction_type
== EXTRACT_LAST_REDUCTION
10089 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10091 if (dump_enabled_p ())
10092 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10093 "reduction comparison operation not supported.\n");
10099 if (bitop1
!= NOP_EXPR
)
10101 machine_mode mode
= TYPE_MODE (comp_vectype
);
10104 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10105 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10108 if (bitop2
!= NOP_EXPR
)
10110 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10112 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10118 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
10119 && reduction_type
== EXTRACT_LAST_REDUCTION
)
10120 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10121 ncopies
* vec_num
, vectype
, NULL
);
10123 vect_cost_for_stmt kind
= vector_stmt
;
10124 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10125 /* Count one reduction-like operation per vector. */
10126 kind
= vec_to_scalar
;
10127 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10130 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10131 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10140 vec_oprnds0
.create (1);
10141 vec_oprnds1
.create (1);
10142 vec_oprnds2
.create (1);
10143 vec_oprnds3
.create (1);
10147 scalar_dest
= gimple_assign_lhs (stmt
);
10148 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10149 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10151 /* Handle cond expr. */
10152 for (j
= 0; j
< ncopies
; j
++)
10154 bool swap_cond_operands
= false;
10156 /* See whether another part of the vectorized code applies a loop
10157 mask to the condition, or to its inverse. */
10159 vec_loop_masks
*masks
= NULL
;
10160 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10162 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10163 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10166 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10167 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10168 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10171 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10172 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10173 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10175 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10176 cond_code
= cond
.code
;
10177 swap_cond_operands
= true;
10183 stmt_vec_info new_stmt_info
= NULL
;
10188 auto_vec
<vec
<tree
>, 4> vec_defs
;
10189 vect_get_slp_defs (slp_node
, &vec_defs
);
10190 vec_oprnds3
= vec_defs
.pop ();
10191 vec_oprnds2
= vec_defs
.pop ();
10193 vec_oprnds1
= vec_defs
.pop ();
10194 vec_oprnds0
= vec_defs
.pop ();
10201 = vect_get_vec_def_for_operand (cond_expr
, stmt_info
,
10207 = vect_get_vec_def_for_operand (cond_expr0
,
10208 stmt_info
, comp_vectype
);
10210 = vect_get_vec_def_for_operand (cond_expr1
,
10211 stmt_info
, comp_vectype
);
10213 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
10215 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10216 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
10223 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
10226 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
10228 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10229 vec_oprnds2
.pop ());
10230 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10231 vec_oprnds3
.pop ());
10236 vec_oprnds0
.quick_push (vec_cond_lhs
);
10238 vec_oprnds1
.quick_push (vec_cond_rhs
);
10239 vec_oprnds2
.quick_push (vec_then_clause
);
10240 vec_oprnds3
.quick_push (vec_else_clause
);
10243 /* Arguments are ready. Create the new vector stmt. */
10244 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10246 vec_then_clause
= vec_oprnds2
[i
];
10247 vec_else_clause
= vec_oprnds3
[i
];
10249 if (swap_cond_operands
)
10250 std::swap (vec_then_clause
, vec_else_clause
);
10253 vec_compare
= vec_cond_lhs
;
10256 vec_cond_rhs
= vec_oprnds1
[i
];
10257 if (bitop1
== NOP_EXPR
)
10258 vec_compare
= build2 (cond_code
, vec_cmp_type
,
10259 vec_cond_lhs
, vec_cond_rhs
);
10262 new_temp
= make_ssa_name (vec_cmp_type
);
10264 if (bitop1
== BIT_NOT_EXPR
)
10265 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10269 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10271 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10272 if (bitop2
== NOP_EXPR
)
10273 vec_compare
= new_temp
;
10274 else if (bitop2
== BIT_NOT_EXPR
)
10276 /* Instead of doing ~x ? y : z do x ? z : y. */
10277 vec_compare
= new_temp
;
10278 std::swap (vec_then_clause
, vec_else_clause
);
10282 vec_compare
= make_ssa_name (vec_cmp_type
);
10284 = gimple_build_assign (vec_compare
, bitop2
,
10285 vec_cond_lhs
, new_temp
);
10286 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10291 /* If we decided to apply a loop mask to the result of the vector
10292 comparison, AND the comparison with the mask now. Later passes
10293 should then be able to reuse the AND results between mulitple
10297 for (int i = 0; i < 100; ++i)
10298 x[i] = y[i] ? z[i] : 10;
10300 results in following optimized GIMPLE:
10302 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10303 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10304 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10305 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10306 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10307 vect_iftmp.11_47, { 10, ... }>;
10309 instead of using a masked and unmasked forms of
10310 vec != { 0, ... } (masked in the MASK_LOAD,
10311 unmasked in the VEC_COND_EXPR). */
10313 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10314 in cases where that's necessary. */
10316 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10318 if (!is_gimple_val (vec_compare
))
10320 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10321 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10323 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10324 vec_compare
= vec_compare_name
;
10327 if (must_invert_cmp_result
)
10329 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10330 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10333 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10334 vec_compare
= vec_compare_name
;
10339 unsigned vec_num
= vec_oprnds0
.length ();
10341 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10342 vectype
, vec_num
* j
+ i
);
10343 tree tmp2
= make_ssa_name (vec_cmp_type
);
10345 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10347 vect_finish_stmt_generation (stmt_info
, g
, gsi
);
10348 vec_compare
= tmp2
;
10352 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10354 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10355 tree lhs
= gimple_get_lhs (old_stmt
);
10356 gcall
*new_stmt
= gimple_build_call_internal
10357 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10359 gimple_call_set_lhs (new_stmt
, lhs
);
10360 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10361 if (old_stmt
== gsi_stmt (*gsi
))
10362 new_stmt_info
= vect_finish_replace_stmt (stmt_info
, new_stmt
);
10365 /* In this case we're moving the definition to later in the
10366 block. That doesn't matter because the only uses of the
10367 lhs are in phi statements. */
10368 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10369 gsi_remove (&old_gsi
, true);
10371 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10376 new_temp
= make_ssa_name (vec_dest
);
10378 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10379 vec_then_clause
, vec_else_clause
);
10381 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10384 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10391 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10393 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10395 prev_stmt_info
= new_stmt_info
;
10398 vec_oprnds0
.release ();
10399 vec_oprnds1
.release ();
10400 vec_oprnds2
.release ();
10401 vec_oprnds3
.release ();
10406 /* vectorizable_comparison.
10408 Check if STMT_INFO is comparison expression that can be vectorized.
10409 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10410 comparison, put it in VEC_STMT, and insert it at GSI.
10412 Return true if STMT_INFO is vectorizable in this way. */
10415 vectorizable_comparison (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10416 stmt_vec_info
*vec_stmt
,
10417 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10419 vec_info
*vinfo
= stmt_info
->vinfo
;
10420 tree lhs
, rhs1
, rhs2
;
10421 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10422 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10423 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10425 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10426 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10428 poly_uint64 nunits
;
10430 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10431 stmt_vec_info prev_stmt_info
= NULL
;
10433 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
10434 vec
<tree
> vec_oprnds0
= vNULL
;
10435 vec
<tree
> vec_oprnds1
= vNULL
;
10439 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10442 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10445 mask_type
= vectype
;
10446 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10451 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10453 gcc_assert (ncopies
>= 1);
10454 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10457 if (STMT_VINFO_LIVE_P (stmt_info
))
10459 if (dump_enabled_p ())
10460 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10461 "value used after loop.\n");
10465 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10469 code
= gimple_assign_rhs_code (stmt
);
10471 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10474 rhs1
= gimple_assign_rhs1 (stmt
);
10475 rhs2
= gimple_assign_rhs2 (stmt
);
10477 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
10480 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
10483 if (vectype1
&& vectype2
10484 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10485 TYPE_VECTOR_SUBPARTS (vectype2
)))
10488 vectype
= vectype1
? vectype1
: vectype2
;
10490 /* Invariant comparison. */
10493 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10495 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10498 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10501 /* Can't compare mask and non-mask types. */
10502 if (vectype1
&& vectype2
10503 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10506 /* Boolean values may have another representation in vectors
10507 and therefore we prefer bit operations over comparison for
10508 them (which also works for scalar masks). We store opcodes
10509 to use in bitop1 and bitop2. Statement is vectorized as
10510 BITOP2 (rhs1 BITOP1 rhs2) or
10511 rhs1 BITOP2 (BITOP1 rhs2)
10512 depending on bitop1 and bitop2 arity. */
10513 bool swap_p
= false;
10514 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10516 if (code
== GT_EXPR
)
10518 bitop1
= BIT_NOT_EXPR
;
10519 bitop2
= BIT_AND_EXPR
;
10521 else if (code
== GE_EXPR
)
10523 bitop1
= BIT_NOT_EXPR
;
10524 bitop2
= BIT_IOR_EXPR
;
10526 else if (code
== LT_EXPR
)
10528 bitop1
= BIT_NOT_EXPR
;
10529 bitop2
= BIT_AND_EXPR
;
10532 else if (code
== LE_EXPR
)
10534 bitop1
= BIT_NOT_EXPR
;
10535 bitop2
= BIT_IOR_EXPR
;
10540 bitop1
= BIT_XOR_EXPR
;
10541 if (code
== EQ_EXPR
)
10542 bitop2
= BIT_NOT_EXPR
;
10548 if (bitop1
== NOP_EXPR
)
10550 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10555 machine_mode mode
= TYPE_MODE (vectype
);
10558 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10559 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10562 if (bitop2
!= NOP_EXPR
)
10564 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10565 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10570 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10571 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10572 dts
, ndts
, slp_node
, cost_vec
);
10579 vec_oprnds0
.create (1);
10580 vec_oprnds1
.create (1);
10584 lhs
= gimple_assign_lhs (stmt
);
10585 mask
= vect_create_destination_var (lhs
, mask_type
);
10587 /* Handle cmp expr. */
10588 for (j
= 0; j
< ncopies
; j
++)
10590 stmt_vec_info new_stmt_info
= NULL
;
10595 auto_vec
<vec
<tree
>, 2> vec_defs
;
10596 vect_get_slp_defs (slp_node
, &vec_defs
);
10597 vec_oprnds1
= vec_defs
.pop ();
10598 vec_oprnds0
= vec_defs
.pop ();
10600 std::swap (vec_oprnds0
, vec_oprnds1
);
10604 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt_info
,
10606 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt_info
,
10612 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
10613 vec_oprnds0
.pop ());
10614 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
10615 vec_oprnds1
.pop ());
10620 if (swap_p
&& j
== 0)
10621 std::swap (vec_rhs1
, vec_rhs2
);
10622 vec_oprnds0
.quick_push (vec_rhs1
);
10623 vec_oprnds1
.quick_push (vec_rhs2
);
10626 /* Arguments are ready. Create the new vector stmt. */
10627 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10629 vec_rhs2
= vec_oprnds1
[i
];
10631 new_temp
= make_ssa_name (mask
);
10632 if (bitop1
== NOP_EXPR
)
10634 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
10635 vec_rhs1
, vec_rhs2
);
10637 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10642 if (bitop1
== BIT_NOT_EXPR
)
10643 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10645 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10648 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10649 if (bitop2
!= NOP_EXPR
)
10651 tree res
= make_ssa_name (mask
);
10652 if (bitop2
== BIT_NOT_EXPR
)
10653 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10655 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10658 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10662 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10669 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10671 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10673 prev_stmt_info
= new_stmt_info
;
10676 vec_oprnds0
.release ();
10677 vec_oprnds1
.release ();
10682 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10683 can handle all live statements in the node. Otherwise return true
10684 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10685 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10688 can_vectorize_live_stmts (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10689 slp_tree slp_node
, slp_instance slp_node_instance
,
10691 stmt_vector_for_cost
*cost_vec
)
10695 stmt_vec_info slp_stmt_info
;
10697 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10699 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10700 && !vectorizable_live_operation (slp_stmt_info
, gsi
, slp_node
,
10701 slp_node_instance
, i
,
10702 vec_stmt_p
, cost_vec
))
10706 else if (STMT_VINFO_LIVE_P (stmt_info
)
10707 && !vectorizable_live_operation (stmt_info
, gsi
, slp_node
,
10708 slp_node_instance
, -1,
10709 vec_stmt_p
, cost_vec
))
10715 /* Make sure the statement is vectorizable. */
10718 vect_analyze_stmt (stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10719 slp_tree node
, slp_instance node_instance
,
10720 stmt_vector_for_cost
*cost_vec
)
10722 vec_info
*vinfo
= stmt_info
->vinfo
;
10723 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
10724 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10726 gimple_seq pattern_def_seq
;
10728 if (dump_enabled_p ())
10729 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10732 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10733 return opt_result::failure_at (stmt_info
->stmt
,
10735 " stmt has volatile operands: %G\n",
10738 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10740 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10742 gimple_stmt_iterator si
;
10744 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10746 stmt_vec_info pattern_def_stmt_info
10747 = vinfo
->lookup_stmt (gsi_stmt (si
));
10748 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10749 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10751 /* Analyze def stmt of STMT if it's a pattern stmt. */
10752 if (dump_enabled_p ())
10753 dump_printf_loc (MSG_NOTE
, vect_location
,
10754 "==> examining pattern def statement: %G",
10755 pattern_def_stmt_info
->stmt
);
10758 = vect_analyze_stmt (pattern_def_stmt_info
,
10759 need_to_vectorize
, node
, node_instance
,
10767 /* Skip stmts that do not need to be vectorized. In loops this is expected
10769 - the COND_EXPR which is the loop exit condition
10770 - any LABEL_EXPRs in the loop
10771 - computations that are used only for array indexing or loop control.
10772 In basic blocks we only analyze statements that are a part of some SLP
10773 instance, therefore, all the statements are relevant.
10775 Pattern statement needs to be analyzed instead of the original statement
10776 if the original statement is not relevant. Otherwise, we analyze both
10777 statements. In basic blocks we are called from some SLP instance
10778 traversal, don't analyze pattern stmts instead, the pattern stmts
10779 already will be part of SLP instance. */
10781 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10782 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10783 && !STMT_VINFO_LIVE_P (stmt_info
))
10785 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10786 && pattern_stmt_info
10787 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10788 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10790 /* Analyze PATTERN_STMT instead of the original stmt. */
10791 stmt_info
= pattern_stmt_info
;
10792 if (dump_enabled_p ())
10793 dump_printf_loc (MSG_NOTE
, vect_location
,
10794 "==> examining pattern statement: %G",
10799 if (dump_enabled_p ())
10800 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10802 return opt_result::success ();
10805 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10807 && pattern_stmt_info
10808 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10809 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10811 /* Analyze PATTERN_STMT too. */
10812 if (dump_enabled_p ())
10813 dump_printf_loc (MSG_NOTE
, vect_location
,
10814 "==> examining pattern statement: %G",
10815 pattern_stmt_info
->stmt
);
10818 = vect_analyze_stmt (pattern_stmt_info
, need_to_vectorize
, node
,
10819 node_instance
, cost_vec
);
10824 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10826 case vect_internal_def
:
10829 case vect_reduction_def
:
10830 case vect_nested_cycle
:
10831 gcc_assert (!bb_vinfo
10832 && (relevance
== vect_used_in_outer
10833 || relevance
== vect_used_in_outer_by_reduction
10834 || relevance
== vect_used_by_reduction
10835 || relevance
== vect_unused_in_scope
10836 || relevance
== vect_used_only_live
));
10839 case vect_induction_def
:
10840 gcc_assert (!bb_vinfo
);
10843 case vect_constant_def
:
10844 case vect_external_def
:
10845 case vect_unknown_def_type
:
10847 gcc_unreachable ();
10850 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10852 tree type
= gimple_expr_type (stmt_info
->stmt
);
10853 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
10854 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10855 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10856 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10857 *need_to_vectorize
= true;
10860 if (PURE_SLP_STMT (stmt_info
) && !node
)
10862 if (dump_enabled_p ())
10863 dump_printf_loc (MSG_NOTE
, vect_location
,
10864 "handled only by SLP analysis\n");
10865 return opt_result::success ();
10870 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10871 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10872 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10873 -mveclibabi= takes preference over library functions with
10874 the simd attribute. */
10875 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10876 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
10878 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10879 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10880 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10881 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
10883 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10884 || vectorizable_reduction (stmt_info
, node
, node_instance
, cost_vec
)
10885 || vectorizable_induction (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10886 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10887 || vectorizable_condition (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10888 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
10890 || vectorizable_lc_phi (stmt_info
, NULL
, node
));
10894 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10895 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
10897 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
,
10899 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10900 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10901 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
,
10903 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
10905 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10906 || vectorizable_condition (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10907 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
10912 return opt_result::failure_at (stmt_info
->stmt
,
10914 " relevant stmt not supported: %G",
10917 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10918 need extra handling, except for vectorizable reductions. */
10920 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10921 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
10922 && !can_vectorize_live_stmts (stmt_info
, NULL
, node
, node_instance
,
10924 return opt_result::failure_at (stmt_info
->stmt
,
10926 " live stmt not supported: %G",
10929 return opt_result::success ();
10933 /* Function vect_transform_stmt.
10935 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10938 vect_transform_stmt (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10939 slp_tree slp_node
, slp_instance slp_node_instance
)
10941 vec_info
*vinfo
= stmt_info
->vinfo
;
10942 bool is_store
= false;
10943 stmt_vec_info vec_stmt
= NULL
;
10946 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
10947 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
10949 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
10950 && nested_in_vect_loop_p
10951 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
10954 gimple
*stmt
= stmt_info
->stmt
;
10955 switch (STMT_VINFO_TYPE (stmt_info
))
10957 case type_demotion_vec_info_type
:
10958 case type_promotion_vec_info_type
:
10959 case type_conversion_vec_info_type
:
10960 done
= vectorizable_conversion (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10965 case induc_vec_info_type
:
10966 done
= vectorizable_induction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10971 case shift_vec_info_type
:
10972 done
= vectorizable_shift (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10976 case op_vec_info_type
:
10977 done
= vectorizable_operation (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10982 case assignment_vec_info_type
:
10983 done
= vectorizable_assignment (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10988 case load_vec_info_type
:
10989 done
= vectorizable_load (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10990 slp_node_instance
, NULL
);
10994 case store_vec_info_type
:
10995 done
= vectorizable_store (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10997 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
10999 /* In case of interleaving, the whole chain is vectorized when the
11000 last store in the chain is reached. Store stmts before the last
11001 one are skipped, and there vec_stmt_info shouldn't be freed
11003 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11004 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11011 case condition_vec_info_type
:
11012 done
= vectorizable_condition (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
11016 case comparison_vec_info_type
:
11017 done
= vectorizable_comparison (stmt_info
, gsi
, &vec_stmt
,
11022 case call_vec_info_type
:
11023 done
= vectorizable_call (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
11024 stmt
= gsi_stmt (*gsi
);
11027 case call_simd_clone_vec_info_type
:
11028 done
= vectorizable_simd_clone_call (stmt_info
, gsi
, &vec_stmt
,
11030 stmt
= gsi_stmt (*gsi
);
11033 case reduc_vec_info_type
:
11034 done
= vect_transform_reduction (stmt_info
, gsi
, &vec_stmt
, slp_node
);
11038 case cycle_phi_info_type
:
11039 done
= vect_transform_cycle_phi (stmt_info
, &vec_stmt
, slp_node
,
11040 slp_node_instance
);
11044 case lc_phi_info_type
:
11045 done
= vectorizable_lc_phi (stmt_info
, &vec_stmt
, slp_node
);
11050 if (!STMT_VINFO_LIVE_P (stmt_info
))
11052 if (dump_enabled_p ())
11053 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11054 "stmt not supported.\n");
11055 gcc_unreachable ();
11059 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11060 This would break hybrid SLP vectorization. */
11062 gcc_assert (!vec_stmt
11063 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
11065 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11066 is being vectorized, but outside the immediately enclosing loop. */
11069 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11070 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
11071 || STMT_VINFO_RELEVANT (stmt_info
) ==
11072 vect_used_in_outer_by_reduction
))
11074 class loop
*innerloop
= LOOP_VINFO_LOOP (
11075 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
11076 imm_use_iterator imm_iter
;
11077 use_operand_p use_p
;
11080 if (dump_enabled_p ())
11081 dump_printf_loc (MSG_NOTE
, vect_location
,
11082 "Record the vdef for outer-loop vectorization.\n");
11084 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11085 (to be used when vectorizing outer-loop stmts that use the DEF of
11087 if (gimple_code (stmt
) == GIMPLE_PHI
)
11088 scalar_dest
= PHI_RESULT (stmt
);
11090 scalar_dest
= gimple_get_lhs (stmt
);
11092 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
11093 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
11095 stmt_vec_info exit_phi_info
11096 = vinfo
->lookup_stmt (USE_STMT (use_p
));
11097 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
11102 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
11104 if (STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
11107 /* If this stmt defines a value used on a backedge, update the
11108 vectorized PHIs. */
11109 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
11110 stmt_vec_info reduc_info
;
11111 if (STMT_VINFO_REDUC_DEF (orig_stmt_info
)
11112 && vect_stmt_to_vectorize (orig_stmt_info
) == stmt_info
11113 && (reduc_info
= info_for_reduction (orig_stmt_info
))
11114 && STMT_VINFO_REDUC_TYPE (reduc_info
) != FOLD_LEFT_REDUCTION
11115 && STMT_VINFO_REDUC_TYPE (reduc_info
) != EXTRACT_LAST_REDUCTION
)
11120 && (phi
= dyn_cast
<gphi
*>
11121 (STMT_VINFO_REDUC_DEF (orig_stmt_info
)->stmt
))
11122 && dominated_by_p (CDI_DOMINATORS
,
11123 gimple_bb (orig_stmt_info
->stmt
), gimple_bb (phi
))
11124 && (e
= loop_latch_edge (gimple_bb (phi
)->loop_father
))
11125 && (PHI_ARG_DEF_FROM_EDGE (phi
, e
)
11126 == gimple_get_lhs (orig_stmt_info
->stmt
)))
11128 stmt_vec_info phi_info
11129 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info
));
11130 stmt_vec_info vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
11133 add_phi_arg (as_a
<gphi
*> (phi_info
->stmt
),
11134 gimple_get_lhs (vec_stmt
->stmt
), e
,
11135 gimple_phi_arg_location (phi
, e
->dest_idx
));
11136 phi_info
= STMT_VINFO_RELATED_STMT (phi_info
);
11137 vec_stmt
= STMT_VINFO_RELATED_STMT (vec_stmt
);
11140 gcc_assert (!vec_stmt
);
11143 && slp_node
!= slp_node_instance
->reduc_phis
)
11145 slp_tree phi_node
= slp_node_instance
->reduc_phis
;
11146 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_SCALAR_STMTS (phi_node
)[0]->stmt
);
11147 e
= loop_latch_edge (gimple_bb (phi
)->loop_father
);
11148 gcc_assert (SLP_TREE_VEC_STMTS (phi_node
).length ()
11149 == SLP_TREE_VEC_STMTS (slp_node
).length ());
11150 for (unsigned i
= 0; i
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++i
)
11151 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[i
]->stmt
),
11152 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]->stmt
),
11153 e
, gimple_phi_arg_location (phi
, e
->dest_idx
));
11157 /* Handle stmts whose DEF is used outside the loop-nest that is
11158 being vectorized. */
11159 done
= can_vectorize_live_stmts (stmt_info
, gsi
, slp_node
,
11160 slp_node_instance
, true, NULL
);
11167 /* Remove a group of stores (for SLP or interleaving), free their
11171 vect_remove_stores (stmt_vec_info first_stmt_info
)
11173 vec_info
*vinfo
= first_stmt_info
->vinfo
;
11174 stmt_vec_info next_stmt_info
= first_stmt_info
;
11176 while (next_stmt_info
)
11178 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11179 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11180 /* Free the attached stmt_vec_info and remove the stmt. */
11181 vinfo
->remove_stmt (next_stmt_info
);
11182 next_stmt_info
= tmp
;
11186 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11187 elements of type SCALAR_TYPE, or null if the target doesn't support
11190 If NUNITS is zero, return a vector type that contains elements of
11191 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11193 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11194 for this vectorization region and want to "autodetect" the best choice.
11195 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11196 and we want the new type to be interoperable with it. PREVAILING_MODE
11197 in this case can be a scalar integer mode or a vector mode; when it
11198 is a vector mode, the function acts like a tree-level version of
11199 related_vector_mode. */
11202 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11203 tree scalar_type
, poly_uint64 nunits
)
11205 tree orig_scalar_type
= scalar_type
;
11206 scalar_mode inner_mode
;
11207 machine_mode simd_mode
;
11210 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11211 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11214 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11216 /* For vector types of elements whose mode precision doesn't
11217 match their types precision we use a element type of mode
11218 precision. The vectorization routines will have to make sure
11219 they support the proper result truncation/extension.
11220 We also make sure to build vector types with INTEGER_TYPE
11221 component type only. */
11222 if (INTEGRAL_TYPE_P (scalar_type
)
11223 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11224 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11225 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11226 TYPE_UNSIGNED (scalar_type
));
11228 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11229 When the component mode passes the above test simply use a type
11230 corresponding to that mode. The theory is that any use that
11231 would cause problems with this will disable vectorization anyway. */
11232 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11233 && !INTEGRAL_TYPE_P (scalar_type
))
11234 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11236 /* We can't build a vector type of elements with alignment bigger than
11238 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11239 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11240 TYPE_UNSIGNED (scalar_type
));
11242 /* If we felt back to using the mode fail if there was
11243 no scalar type for it. */
11244 if (scalar_type
== NULL_TREE
)
11247 /* If no prevailing mode was supplied, use the mode the target prefers.
11248 Otherwise lookup a vector mode based on the prevailing mode. */
11249 if (prevailing_mode
== VOIDmode
)
11251 gcc_assert (known_eq (nunits
, 0U));
11252 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11253 if (SCALAR_INT_MODE_P (simd_mode
))
11255 /* Traditional behavior is not to take the integer mode
11256 literally, but simply to use it as a way of determining
11257 the vector size. It is up to mode_for_vector to decide
11258 what the TYPE_MODE should be.
11260 Note that nunits == 1 is allowed in order to support single
11261 element vector types. */
11262 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11263 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11267 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11268 || !related_vector_mode (prevailing_mode
,
11269 inner_mode
, nunits
).exists (&simd_mode
))
11271 /* Fall back to using mode_for_vector, mostly in the hope of being
11272 able to use an integer mode. */
11273 if (known_eq (nunits
, 0U)
11274 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11277 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11281 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11283 /* In cases where the mode was chosen by mode_for_vector, check that
11284 the target actually supports the chosen mode, or that it at least
11285 allows the vector mode to be replaced by a like-sized integer. */
11286 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11287 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11290 /* Re-attach the address-space qualifier if we canonicalized the scalar
11292 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11293 return build_qualified_type
11294 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11299 /* Function get_vectype_for_scalar_type.
11301 Returns the vector type corresponding to SCALAR_TYPE as supported
11302 by the target. If GROUP_SIZE is nonzero and we're performing BB
11303 vectorization, make sure that the number of elements in the vector
11304 is no bigger than GROUP_SIZE. */
11307 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11308 unsigned int group_size
)
11310 /* For BB vectorization, we should always have a group size once we've
11311 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11312 are tentative requests during things like early data reference
11313 analysis and pattern recognition. */
11314 if (is_a
<bb_vec_info
> (vinfo
))
11315 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11319 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11321 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11322 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11324 /* Register the natural choice of vector type, before the group size
11325 has been applied. */
11327 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11329 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11330 try again with an explicit number of elements. */
11333 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11335 /* Start with the biggest number of units that fits within
11336 GROUP_SIZE and halve it until we find a valid vector type.
11337 Usually either the first attempt will succeed or all will
11338 fail (in the latter case because GROUP_SIZE is too small
11339 for the target), but it's possible that a target could have
11340 a hole between supported vector types.
11342 If GROUP_SIZE is not a power of 2, this has the effect of
11343 trying the largest power of 2 that fits within the group,
11344 even though the group is not a multiple of that vector size.
11345 The BB vectorizer will then try to carve up the group into
11347 unsigned int nunits
= 1 << floor_log2 (group_size
);
11350 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11351 scalar_type
, nunits
);
11354 while (nunits
> 1 && !vectype
);
11360 /* Return the vector type corresponding to SCALAR_TYPE as supported
11361 by the target. NODE, if nonnull, is the SLP tree node that will
11362 use the returned vector type. */
11365 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11367 unsigned int group_size
= 0;
11370 group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
11371 if (group_size
== 0)
11372 group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
11374 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11377 /* Function get_mask_type_for_scalar_type.
11379 Returns the mask type corresponding to a result of comparison
11380 of vectors of specified SCALAR_TYPE as supported by target.
11381 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11382 make sure that the number of elements in the vector is no bigger
11383 than GROUP_SIZE. */
11386 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11387 unsigned int group_size
)
11389 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11394 return truth_type_for (vectype
);
11397 /* Function get_same_sized_vectype
11399 Returns a vector type corresponding to SCALAR_TYPE of size
11400 VECTOR_TYPE if supported by the target. */
11403 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11405 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11406 return truth_type_for (vector_type
);
11408 poly_uint64 nunits
;
11409 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11410 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11413 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11414 scalar_type
, nunits
);
11417 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11418 would not change the chosen vector modes. */
11421 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11423 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11424 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11425 if (!VECTOR_MODE_P (*i
)
11426 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11431 /* Function vect_is_simple_use.
11434 VINFO - the vect info of the loop or basic block that is being vectorized.
11435 OPERAND - operand in the loop or bb.
11437 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11438 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11439 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11440 the definition could be anywhere in the function
11441 DT - the type of definition
11443 Returns whether a stmt with OPERAND can be vectorized.
11444 For loops, supportable operands are constants, loop invariants, and operands
11445 that are defined by the current iteration of the loop. Unsupportable
11446 operands are those that are defined by a previous iteration of the loop (as
11447 is the case in reduction/induction computations).
11448 For basic blocks, supportable operands are constants and bb invariants.
11449 For now, operands defined outside the basic block are not supported. */
11452 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11453 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11455 if (def_stmt_info_out
)
11456 *def_stmt_info_out
= NULL
;
11458 *def_stmt_out
= NULL
;
11459 *dt
= vect_unknown_def_type
;
11461 if (dump_enabled_p ())
11463 dump_printf_loc (MSG_NOTE
, vect_location
,
11464 "vect_is_simple_use: operand ");
11465 if (TREE_CODE (operand
) == SSA_NAME
11466 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11467 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11469 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11472 if (CONSTANT_CLASS_P (operand
))
11473 *dt
= vect_constant_def
;
11474 else if (is_gimple_min_invariant (operand
))
11475 *dt
= vect_external_def
;
11476 else if (TREE_CODE (operand
) != SSA_NAME
)
11477 *dt
= vect_unknown_def_type
;
11478 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11479 *dt
= vect_external_def
;
11482 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11483 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11485 *dt
= vect_external_def
;
11488 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11489 def_stmt
= stmt_vinfo
->stmt
;
11490 switch (gimple_code (def_stmt
))
11493 case GIMPLE_ASSIGN
:
11495 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11498 *dt
= vect_unknown_def_type
;
11501 if (def_stmt_info_out
)
11502 *def_stmt_info_out
= stmt_vinfo
;
11505 *def_stmt_out
= def_stmt
;
11508 if (dump_enabled_p ())
11510 dump_printf (MSG_NOTE
, ", type of def: ");
11513 case vect_uninitialized_def
:
11514 dump_printf (MSG_NOTE
, "uninitialized\n");
11516 case vect_constant_def
:
11517 dump_printf (MSG_NOTE
, "constant\n");
11519 case vect_external_def
:
11520 dump_printf (MSG_NOTE
, "external\n");
11522 case vect_internal_def
:
11523 dump_printf (MSG_NOTE
, "internal\n");
11525 case vect_induction_def
:
11526 dump_printf (MSG_NOTE
, "induction\n");
11528 case vect_reduction_def
:
11529 dump_printf (MSG_NOTE
, "reduction\n");
11531 case vect_double_reduction_def
:
11532 dump_printf (MSG_NOTE
, "double reduction\n");
11534 case vect_nested_cycle
:
11535 dump_printf (MSG_NOTE
, "nested cycle\n");
11537 case vect_unknown_def_type
:
11538 dump_printf (MSG_NOTE
, "unknown\n");
11543 if (*dt
== vect_unknown_def_type
)
11545 if (dump_enabled_p ())
11546 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11547 "Unsupported pattern.\n");
11554 /* Function vect_is_simple_use.
11556 Same as vect_is_simple_use but also determines the vector operand
11557 type of OPERAND and stores it to *VECTYPE. If the definition of
11558 OPERAND is vect_uninitialized_def, vect_constant_def or
11559 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11560 is responsible to compute the best suited vector type for the
11564 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11565 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11566 gimple
**def_stmt_out
)
11568 stmt_vec_info def_stmt_info
;
11570 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11574 *def_stmt_out
= def_stmt
;
11575 if (def_stmt_info_out
)
11576 *def_stmt_info_out
= def_stmt_info
;
11578 /* Now get a vector type if the def is internal, otherwise supply
11579 NULL_TREE and leave it up to the caller to figure out a proper
11580 type for the use stmt. */
11581 if (*dt
== vect_internal_def
11582 || *dt
== vect_induction_def
11583 || *dt
== vect_reduction_def
11584 || *dt
== vect_double_reduction_def
11585 || *dt
== vect_nested_cycle
)
11587 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11588 gcc_assert (*vectype
!= NULL_TREE
);
11589 if (dump_enabled_p ())
11590 dump_printf_loc (MSG_NOTE
, vect_location
,
11591 "vect_is_simple_use: vectype %T\n", *vectype
);
11593 else if (*dt
== vect_uninitialized_def
11594 || *dt
== vect_constant_def
11595 || *dt
== vect_external_def
)
11596 *vectype
= NULL_TREE
;
11598 gcc_unreachable ();
11604 /* Function supportable_widening_operation
11606 Check whether an operation represented by the code CODE is a
11607 widening operation that is supported by the target platform in
11608 vector form (i.e., when operating on arguments of type VECTYPE_IN
11609 producing a result of type VECTYPE_OUT).
11611 Widening operations we currently support are NOP (CONVERT), FLOAT,
11612 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11613 are supported by the target platform either directly (via vector
11614 tree-codes), or via target builtins.
11617 - CODE1 and CODE2 are codes of vector operations to be used when
11618 vectorizing the operation, if available.
11619 - MULTI_STEP_CVT determines the number of required intermediate steps in
11620 case of multi-step conversion (like char->short->int - in that case
11621 MULTI_STEP_CVT will be 1).
11622 - INTERM_TYPES contains the intermediate type required to perform the
11623 widening operation (short in the above example). */
11626 supportable_widening_operation (enum tree_code code
, stmt_vec_info stmt_info
,
11627 tree vectype_out
, tree vectype_in
,
11628 enum tree_code
*code1
, enum tree_code
*code2
,
11629 int *multi_step_cvt
,
11630 vec
<tree
> *interm_types
)
11632 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
11633 class loop
*vect_loop
= NULL
;
11634 machine_mode vec_mode
;
11635 enum insn_code icode1
, icode2
;
11636 optab optab1
, optab2
;
11637 tree vectype
= vectype_in
;
11638 tree wide_vectype
= vectype_out
;
11639 enum tree_code c1
, c2
;
11641 tree prev_type
, intermediate_type
;
11642 machine_mode intermediate_mode
, prev_mode
;
11643 optab optab3
, optab4
;
11645 *multi_step_cvt
= 0;
11647 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11651 case WIDEN_MULT_EXPR
:
11652 /* The result of a vectorized widening operation usually requires
11653 two vectors (because the widened results do not fit into one vector).
11654 The generated vector results would normally be expected to be
11655 generated in the same order as in the original scalar computation,
11656 i.e. if 8 results are generated in each vector iteration, they are
11657 to be organized as follows:
11658 vect1: [res1,res2,res3,res4],
11659 vect2: [res5,res6,res7,res8].
11661 However, in the special case that the result of the widening
11662 operation is used in a reduction computation only, the order doesn't
11663 matter (because when vectorizing a reduction we change the order of
11664 the computation). Some targets can take advantage of this and
11665 generate more efficient code. For example, targets like Altivec,
11666 that support widen_mult using a sequence of {mult_even,mult_odd}
11667 generate the following vectors:
11668 vect1: [res1,res3,res5,res7],
11669 vect2: [res2,res4,res6,res8].
11671 When vectorizing outer-loops, we execute the inner-loop sequentially
11672 (each vectorized inner-loop iteration contributes to VF outer-loop
11673 iterations in parallel). We therefore don't allow to change the
11674 order of the computation in the inner-loop during outer-loop
11676 /* TODO: Another case in which order doesn't *really* matter is when we
11677 widen and then contract again, e.g. (short)((int)x * y >> 8).
11678 Normally, pack_trunc performs an even/odd permute, whereas the
11679 repack from an even/odd expansion would be an interleave, which
11680 would be significantly simpler for e.g. AVX2. */
11681 /* In any case, in order to avoid duplicating the code below, recurse
11682 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11683 are properly set up for the caller. If we fail, we'll continue with
11684 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11686 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11687 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11688 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
11689 stmt_info
, vectype_out
,
11690 vectype_in
, code1
, code2
,
11691 multi_step_cvt
, interm_types
))
11693 /* Elements in a vector with vect_used_by_reduction property cannot
11694 be reordered if the use chain with this property does not have the
11695 same operation. One such an example is s += a * b, where elements
11696 in a and b cannot be reordered. Here we check if the vector defined
11697 by STMT is only directly used in the reduction statement. */
11698 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11699 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11701 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11704 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11705 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11708 case DOT_PROD_EXPR
:
11709 c1
= DOT_PROD_EXPR
;
11710 c2
= DOT_PROD_EXPR
;
11718 case VEC_WIDEN_MULT_EVEN_EXPR
:
11719 /* Support the recursion induced just above. */
11720 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11721 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11724 case WIDEN_LSHIFT_EXPR
:
11725 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11726 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11730 c1
= VEC_UNPACK_LO_EXPR
;
11731 c2
= VEC_UNPACK_HI_EXPR
;
11735 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11736 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11739 case FIX_TRUNC_EXPR
:
11740 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11741 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11745 gcc_unreachable ();
11748 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11749 std::swap (c1
, c2
);
11751 if (code
== FIX_TRUNC_EXPR
)
11753 /* The signedness is determined from output operand. */
11754 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11755 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11757 else if (CONVERT_EXPR_CODE_P (code
)
11758 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11759 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11760 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11761 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11763 /* If the input and result modes are the same, a different optab
11764 is needed where we pass in the number of units in vectype. */
11765 optab1
= vec_unpacks_sbool_lo_optab
;
11766 optab2
= vec_unpacks_sbool_hi_optab
;
11770 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11771 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11774 if (!optab1
|| !optab2
)
11777 vec_mode
= TYPE_MODE (vectype
);
11778 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11779 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11785 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11786 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11788 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11790 /* For scalar masks we may have different boolean
11791 vector types having the same QImode. Thus we
11792 add additional check for elements number. */
11793 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11794 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11798 /* Check if it's a multi-step conversion that can be done using intermediate
11801 prev_type
= vectype
;
11802 prev_mode
= vec_mode
;
11804 if (!CONVERT_EXPR_CODE_P (code
))
11807 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11808 intermediate steps in promotion sequence. We try
11809 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11811 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11812 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11814 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11815 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11817 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
11820 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11821 TYPE_UNSIGNED (prev_type
));
11823 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11824 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11825 && intermediate_mode
== prev_mode
11826 && SCALAR_INT_MODE_P (prev_mode
))
11828 /* If the input and result modes are the same, a different optab
11829 is needed where we pass in the number of units in vectype. */
11830 optab3
= vec_unpacks_sbool_lo_optab
;
11831 optab4
= vec_unpacks_sbool_hi_optab
;
11835 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11836 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11839 if (!optab3
|| !optab4
11840 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11841 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11842 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11843 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11844 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11845 == CODE_FOR_nothing
)
11846 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11847 == CODE_FOR_nothing
))
11850 interm_types
->quick_push (intermediate_type
);
11851 (*multi_step_cvt
)++;
11853 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11854 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11856 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11858 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11859 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11863 prev_type
= intermediate_type
;
11864 prev_mode
= intermediate_mode
;
11867 interm_types
->release ();
11872 /* Function supportable_narrowing_operation
11874 Check whether an operation represented by the code CODE is a
11875 narrowing operation that is supported by the target platform in
11876 vector form (i.e., when operating on arguments of type VECTYPE_IN
11877 and producing a result of type VECTYPE_OUT).
11879 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11880 and FLOAT. This function checks if these operations are supported by
11881 the target platform directly via vector tree-codes.
11884 - CODE1 is the code of a vector operation to be used when
11885 vectorizing the operation, if available.
11886 - MULTI_STEP_CVT determines the number of required intermediate steps in
11887 case of multi-step conversion (like int->short->char - in that case
11888 MULTI_STEP_CVT will be 1).
11889 - INTERM_TYPES contains the intermediate type required to perform the
11890 narrowing operation (short in the above example). */
11893 supportable_narrowing_operation (enum tree_code code
,
11894 tree vectype_out
, tree vectype_in
,
11895 enum tree_code
*code1
, int *multi_step_cvt
,
11896 vec
<tree
> *interm_types
)
11898 machine_mode vec_mode
;
11899 enum insn_code icode1
;
11900 optab optab1
, interm_optab
;
11901 tree vectype
= vectype_in
;
11902 tree narrow_vectype
= vectype_out
;
11904 tree intermediate_type
, prev_type
;
11905 machine_mode intermediate_mode
, prev_mode
;
11909 *multi_step_cvt
= 0;
11913 c1
= VEC_PACK_TRUNC_EXPR
;
11914 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
11915 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11916 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
11917 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11918 optab1
= vec_pack_sbool_trunc_optab
;
11920 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11923 case FIX_TRUNC_EXPR
:
11924 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
11925 /* The signedness is determined from output operand. */
11926 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11930 c1
= VEC_PACK_FLOAT_EXPR
;
11931 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11935 gcc_unreachable ();
11941 vec_mode
= TYPE_MODE (vectype
);
11942 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
11947 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11949 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11951 /* For scalar masks we may have different boolean
11952 vector types having the same QImode. Thus we
11953 add additional check for elements number. */
11954 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
11955 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11959 if (code
== FLOAT_EXPR
)
11962 /* Check if it's a multi-step conversion that can be done using intermediate
11964 prev_mode
= vec_mode
;
11965 prev_type
= vectype
;
11966 if (code
== FIX_TRUNC_EXPR
)
11967 uns
= TYPE_UNSIGNED (vectype_out
);
11969 uns
= TYPE_UNSIGNED (vectype
);
11971 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11972 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11973 costly than signed. */
11974 if (code
== FIX_TRUNC_EXPR
&& uns
)
11976 enum insn_code icode2
;
11979 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
11981 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11982 if (interm_optab
!= unknown_optab
11983 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
11984 && insn_data
[icode1
].operand
[0].mode
11985 == insn_data
[icode2
].operand
[0].mode
)
11988 optab1
= interm_optab
;
11993 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11994 intermediate steps in promotion sequence. We try
11995 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11996 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11997 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11999 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12000 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12002 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12005 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12006 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12007 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12008 && intermediate_mode
== prev_mode
12009 && SCALAR_INT_MODE_P (prev_mode
))
12010 interm_optab
= vec_pack_sbool_trunc_optab
;
12013 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12016 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12017 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12018 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12019 == CODE_FOR_nothing
))
12022 interm_types
->quick_push (intermediate_type
);
12023 (*multi_step_cvt
)++;
12025 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12027 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12029 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12030 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12034 prev_mode
= intermediate_mode
;
12035 prev_type
= intermediate_type
;
12036 optab1
= interm_optab
;
12039 interm_types
->release ();
12043 /* Generate and return a statement that sets vector mask MASK such that
12044 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12047 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
12049 tree cmp_type
= TREE_TYPE (start_index
);
12050 tree mask_type
= TREE_TYPE (mask
);
12051 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12052 cmp_type
, mask_type
,
12053 OPTIMIZE_FOR_SPEED
));
12054 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12055 start_index
, end_index
,
12056 build_zero_cst (mask_type
));
12057 gimple_call_set_lhs (call
, mask
);
12061 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12062 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12065 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12068 tree tmp
= make_ssa_name (mask_type
);
12069 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
12070 gimple_seq_add_stmt (seq
, call
);
12071 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12074 /* Try to compute the vector types required to vectorize STMT_INFO,
12075 returning true on success and false if vectorization isn't possible.
12076 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12077 take sure that the number of elements in the vectors is no bigger
12082 - Set *STMT_VECTYPE_OUT to:
12083 - NULL_TREE if the statement doesn't need to be vectorized;
12084 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12086 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12087 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12088 statement does not help to determine the overall number of units. */
12091 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
12092 tree
*stmt_vectype_out
,
12093 tree
*nunits_vectype_out
,
12094 unsigned int group_size
)
12096 vec_info
*vinfo
= stmt_info
->vinfo
;
12097 gimple
*stmt
= stmt_info
->stmt
;
12099 /* For BB vectorization, we should always have a group size once we've
12100 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12101 are tentative requests during things like early data reference
12102 analysis and pattern recognition. */
12103 if (is_a
<bb_vec_info
> (vinfo
))
12104 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12108 *stmt_vectype_out
= NULL_TREE
;
12109 *nunits_vectype_out
= NULL_TREE
;
12111 if (gimple_get_lhs (stmt
) == NULL_TREE
12112 /* MASK_STORE has no lhs, but is ok. */
12113 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12115 if (is_a
<gcall
*> (stmt
))
12117 /* Ignore calls with no lhs. These must be calls to
12118 #pragma omp simd functions, and what vectorization factor
12119 it really needs can't be determined until
12120 vectorizable_simd_clone_call. */
12121 if (dump_enabled_p ())
12122 dump_printf_loc (MSG_NOTE
, vect_location
,
12123 "defer to SIMD clone analysis.\n");
12124 return opt_result::success ();
12127 return opt_result::failure_at (stmt
,
12128 "not vectorized: irregular stmt.%G", stmt
);
12131 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
12132 return opt_result::failure_at (stmt
,
12133 "not vectorized: vector stmt in loop:%G",
12137 tree scalar_type
= NULL_TREE
;
12138 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12140 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12141 if (dump_enabled_p ())
12142 dump_printf_loc (MSG_NOTE
, vect_location
,
12143 "precomputed vectype: %T\n", vectype
);
12145 else if (vect_use_mask_type_p (stmt_info
))
12147 unsigned int precision
= stmt_info
->mask_precision
;
12148 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12149 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12151 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12152 " data-type %T\n", scalar_type
);
12153 if (dump_enabled_p ())
12154 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12158 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12159 scalar_type
= TREE_TYPE (DR_REF (dr
));
12160 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12161 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12163 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12165 if (dump_enabled_p ())
12168 dump_printf_loc (MSG_NOTE
, vect_location
,
12169 "get vectype for scalar type (group size %d):"
12170 " %T\n", group_size
, scalar_type
);
12172 dump_printf_loc (MSG_NOTE
, vect_location
,
12173 "get vectype for scalar type: %T\n", scalar_type
);
12175 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12177 return opt_result::failure_at (stmt
,
12179 " unsupported data-type %T\n",
12182 if (dump_enabled_p ())
12183 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12185 *stmt_vectype_out
= vectype
;
12187 /* Don't try to compute scalar types if the stmt produces a boolean
12188 vector; use the existing vector type instead. */
12189 tree nunits_vectype
= vectype
;
12190 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12192 /* The number of units is set according to the smallest scalar
12193 type (or the largest vector size, but we only support one
12194 vector size per vectorization). */
12195 HOST_WIDE_INT dummy
;
12196 scalar_type
= vect_get_smallest_scalar_type (stmt_info
, &dummy
, &dummy
);
12197 if (scalar_type
!= TREE_TYPE (vectype
))
12199 if (dump_enabled_p ())
12200 dump_printf_loc (MSG_NOTE
, vect_location
,
12201 "get vectype for smallest scalar type: %T\n",
12203 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12205 if (!nunits_vectype
)
12206 return opt_result::failure_at
12207 (stmt
, "not vectorized: unsupported data-type %T\n",
12209 if (dump_enabled_p ())
12210 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12215 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12216 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)));
12218 if (dump_enabled_p ())
12220 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12221 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12222 dump_printf (MSG_NOTE
, "\n");
12225 *nunits_vectype_out
= nunits_vectype
;
12226 return opt_result::success ();