1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 if (is_gimple_debug (gsi_stmt (si
)))
641 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "init: stmt relevant? %G", stmt_info
->stmt
);
646 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
647 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
651 /* 2. Process_worklist */
652 while (worklist
.length () > 0)
657 stmt_vec_info stmt_vinfo
= worklist
.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE
, vect_location
,
660 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
680 case vect_reduction_def
:
681 gcc_assert (relevant
!= vect_unused_in_scope
);
682 if (relevant
!= vect_unused_in_scope
683 && relevant
!= vect_used_in_scope
684 && relevant
!= vect_used_by_reduction
685 && relevant
!= vect_used_only_live
)
686 return opt_result::failure_at
687 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
690 case vect_nested_cycle
:
691 if (relevant
!= vect_unused_in_scope
692 && relevant
!= vect_used_in_outer_by_reduction
693 && relevant
!= vect_used_in_outer
)
694 return opt_result::failure_at
695 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
698 case vect_double_reduction_def
:
699 if (relevant
!= vect_unused_in_scope
700 && relevant
!= vect_used_by_reduction
701 && relevant
!= vect_used_only_live
)
702 return opt_result::failure_at
703 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
710 if (is_pattern_stmt_p (stmt_vinfo
))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
717 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
718 tree op
= gimple_assign_rhs1 (assign
);
721 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
724 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
725 loop_vinfo
, relevant
, &worklist
, false);
728 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
729 loop_vinfo
, relevant
, &worklist
, false);
734 for (; i
< gimple_num_ops (assign
); i
++)
736 op
= gimple_op (assign
, i
);
737 if (TREE_CODE (op
) == SSA_NAME
)
740 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
747 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
749 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
751 tree arg
= gimple_call_arg (call
, i
);
753 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
765 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
773 gather_scatter_info gs_info
;
774 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
777 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
798 vect_model_simple_cost (vec_info
*,
799 stmt_vec_info stmt_info
, int ncopies
,
800 enum vect_def_type
*dt
,
803 stmt_vector_for_cost
*cost_vec
,
804 vect_cost_for_stmt kind
= vector_stmt
)
806 int inside_cost
= 0, prologue_cost
= 0;
808 gcc_assert (cost_vec
!= NULL
);
810 /* ??? Somehow we need to fix this at the callers. */
812 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
818 for (int i
= 0; i
< ndts
; i
++)
819 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
820 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
821 stmt_info
, 0, vect_prologue
);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
825 stmt_info
, 0, vect_body
);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE
, vect_location
,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
843 enum vect_def_type
*dt
,
844 unsigned int ncopies
, int pwr
,
845 stmt_vector_for_cost
*cost_vec
)
848 int inside_cost
= 0, prologue_cost
= 0;
850 for (i
= 0; i
< pwr
+ 1; i
++)
852 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_promote_demote
,
853 stmt_info
, 0, vect_body
);
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i
= 0; i
< 2; i
++)
859 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
860 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
861 stmt_info
, 0, vect_prologue
);
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE
, vect_location
,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
869 /* Returns true if the current function returns DECL. */
872 cfun_returns (tree decl
)
876 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
878 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
881 if (gimple_return_retval (ret
) == decl
)
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
889 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
891 while (gimple_clobber_p (def
));
892 if (is_a
<gassign
*> (def
)
893 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
894 && gimple_assign_rhs1 (def
) == decl
)
900 /* Function vect_model_store_cost
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
906 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
907 vect_memory_access_type memory_access_type
,
908 vec_load_store_type vls_type
, slp_tree slp_node
,
909 stmt_vector_for_cost
*cost_vec
)
911 unsigned int inside_cost
= 0, prologue_cost
= 0;
912 stmt_vec_info first_stmt_info
= stmt_info
;
913 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
915 /* ??? Somehow we need to fix this at the callers. */
917 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
919 if (vls_type
== VLS_STORE_INVARIANT
)
922 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
923 stmt_info
, 0, vect_prologue
);
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node
&& grouped_access_p
)
929 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
941 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
943 /* Uses a high and low interleave or shuffle operations for each
945 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
946 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
947 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
948 stmt_info
, 0, vect_body
);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE
, vect_location
,
952 "vect_model_store_cost: strided group_size = %d .\n",
956 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
957 /* Costs of the stores. */
958 if (memory_access_type
== VMAT_ELEMENTWISE
959 || memory_access_type
== VMAT_GATHER_SCATTER
)
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
963 inside_cost
+= record_stmt_cost (cost_vec
,
964 ncopies
* assumed_nunits
,
965 scalar_store
, stmt_info
, 0, vect_body
);
968 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, &inside_cost
, cost_vec
);
970 if (memory_access_type
== VMAT_ELEMENTWISE
971 || memory_access_type
== VMAT_STRIDED_SLP
)
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
975 inside_cost
+= record_stmt_cost (cost_vec
,
976 ncopies
* assumed_nunits
,
977 vec_to_scalar
, stmt_info
, 0, vect_body
);
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
986 && (TREE_CODE (base
) == RESULT_DECL
987 || (DECL_P (base
) && cfun_returns (base
)))
988 && !aggregate_value_p (base
, cfun
->decl
))
990 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
994 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
1000 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1002 stmt_info
, 0, vect_epilogue
);
1004 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1006 stmt_info
, 0, vect_epilogue
);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE
, vect_location
,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1018 /* Calculate cost of DR's memory access. */
1020 vect_get_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1021 unsigned int *inside_cost
,
1022 stmt_vector_for_cost
*body_cost_vec
)
1024 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1028 switch (alignment_support_scheme
)
1032 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1033 vector_store
, stmt_info
, 0,
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE
, vect_location
,
1038 "vect_model_store_cost: aligned.\n");
1042 case dr_unaligned_supported
:
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1046 unaligned_store
, stmt_info
,
1047 DR_MISALIGNMENT (dr_info
),
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE
, vect_location
,
1051 "vect_model_store_cost: unaligned supported by "
1056 case dr_unaligned_unsupported
:
1058 *inside_cost
= VECT_MAX_COST
;
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1062 "vect_model_store_cost: unsupported access.\n");
1072 /* Function vect_model_load_cost
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1080 vect_model_load_cost (vec_info
*vinfo
,
1081 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1082 vect_memory_access_type memory_access_type
,
1084 stmt_vector_for_cost
*cost_vec
)
1086 unsigned int inside_cost
= 0, prologue_cost
= 0;
1087 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1089 gcc_assert (cost_vec
);
1091 /* ??? Somehow we need to fix this at the callers. */
1093 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1095 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms
, n_loads
;
1102 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1103 vf
, true, &n_perms
, &n_loads
);
1104 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1105 first_stmt_info
, 0, vect_body
);
1107 /* And adjust the number of loads performed. This handles
1108 redundancies as well as loads that are later dead. */
1112 /* Grouped loads read all elements in the group at once,
1113 so we want the DR for the first statement. */
1114 stmt_vec_info first_stmt_info
= stmt_info
;
1115 if (!slp_node
&& grouped_access_p
)
1116 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1118 /* True if we should include any once-per-group costs as well as
1119 the cost of the statement itself. For SLP we only get called
1120 once per group anyhow. */
1121 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1123 /* We assume that the cost of a single load-lanes instruction is
1124 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1125 access is instead being provided by a load-and-permute operation,
1126 include the cost of the permutes. */
1128 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1130 /* Uses an even and odd extract operations or shuffle operations
1131 for each needed permute. */
1132 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1133 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1134 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1135 stmt_info
, 0, vect_body
);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE
, vect_location
,
1139 "vect_model_load_cost: strided group_size = %d .\n",
1143 /* The loads themselves. */
1144 if (memory_access_type
== VMAT_ELEMENTWISE
1145 || memory_access_type
== VMAT_GATHER_SCATTER
)
1147 /* N scalar loads plus gathering them into a vector. */
1148 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1149 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1150 inside_cost
+= record_stmt_cost (cost_vec
,
1151 ncopies
* assumed_nunits
,
1152 scalar_load
, stmt_info
, 0, vect_body
);
1155 vect_get_load_cost (vinfo
, stmt_info
, ncopies
, first_stmt_p
,
1156 &inside_cost
, &prologue_cost
,
1157 cost_vec
, cost_vec
, true);
1158 if (memory_access_type
== VMAT_ELEMENTWISE
1159 || memory_access_type
== VMAT_STRIDED_SLP
)
1160 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1161 stmt_info
, 0, vect_body
);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE
, vect_location
,
1165 "vect_model_load_cost: inside_cost = %d, "
1166 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1170 /* Calculate cost of DR's memory access. */
1172 vect_get_load_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1173 bool add_realign_cost
, unsigned int *inside_cost
,
1174 unsigned int *prologue_cost
,
1175 stmt_vector_for_cost
*prologue_cost_vec
,
1176 stmt_vector_for_cost
*body_cost_vec
,
1177 bool record_prologue_costs
)
1179 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1180 int alignment_support_scheme
1181 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1183 switch (alignment_support_scheme
)
1187 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1188 stmt_info
, 0, vect_body
);
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_NOTE
, vect_location
,
1192 "vect_model_load_cost: aligned.\n");
1196 case dr_unaligned_supported
:
1198 /* Here, we assign an additional cost for the unaligned load. */
1199 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1200 unaligned_load
, stmt_info
,
1201 DR_MISALIGNMENT (dr_info
),
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE
, vect_location
,
1206 "vect_model_load_cost: unaligned supported by "
1211 case dr_explicit_realign
:
1213 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1214 vector_load
, stmt_info
, 0, vect_body
);
1215 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1216 vec_perm
, stmt_info
, 0, vect_body
);
1218 /* FIXME: If the misalignment remains fixed across the iterations of
1219 the containing loop, the following cost should be added to the
1221 if (targetm
.vectorize
.builtin_mask_for_load
)
1222 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1223 stmt_info
, 0, vect_body
);
1225 if (dump_enabled_p ())
1226 dump_printf_loc (MSG_NOTE
, vect_location
,
1227 "vect_model_load_cost: explicit realign\n");
1231 case dr_explicit_realign_optimized
:
1233 if (dump_enabled_p ())
1234 dump_printf_loc (MSG_NOTE
, vect_location
,
1235 "vect_model_load_cost: unaligned software "
1238 /* Unaligned software pipeline has a load of an address, an initial
1239 load, and possibly a mask operation to "prime" the loop. However,
1240 if this is an access in a group of loads, which provide grouped
1241 access, then the above cost should only be considered for one
1242 access in the group. Inside the loop, there is a load op
1243 and a realignment op. */
1245 if (add_realign_cost
&& record_prologue_costs
)
1247 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1248 vector_stmt
, stmt_info
,
1250 if (targetm
.vectorize
.builtin_mask_for_load
)
1251 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1252 vector_stmt
, stmt_info
,
1256 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1257 stmt_info
, 0, vect_body
);
1258 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1259 stmt_info
, 0, vect_body
);
1261 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_NOTE
, vect_location
,
1263 "vect_model_load_cost: explicit realign optimized"
1269 case dr_unaligned_unsupported
:
1271 *inside_cost
= VECT_MAX_COST
;
1273 if (dump_enabled_p ())
1274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1275 "vect_model_load_cost: unsupported access.\n");
1284 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1285 the loop preheader for the vectorized stmt STMT_VINFO. */
1288 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1289 gimple_stmt_iterator
*gsi
)
1292 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1294 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1296 if (dump_enabled_p ())
1297 dump_printf_loc (MSG_NOTE
, vect_location
,
1298 "created new init_stmt: %G", new_stmt
);
1301 /* Function vect_init_vector.
1303 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1304 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1305 vector type a vector with all elements equal to VAL is created first.
1306 Place the initialization at GSI if it is not NULL. Otherwise, place the
1307 initialization at the loop preheader.
1308 Return the DEF of INIT_STMT.
1309 It will be used in the vectorization of STMT_INFO. */
1312 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1313 gimple_stmt_iterator
*gsi
)
1318 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1319 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1321 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1322 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1324 /* Scalar boolean value should be transformed into
1325 all zeros or all ones value before building a vector. */
1326 if (VECTOR_BOOLEAN_TYPE_P (type
))
1328 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1329 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1331 if (CONSTANT_CLASS_P (val
))
1332 val
= integer_zerop (val
) ? false_val
: true_val
;
1335 new_temp
= make_ssa_name (TREE_TYPE (type
));
1336 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1337 val
, true_val
, false_val
);
1338 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1344 gimple_seq stmts
= NULL
;
1345 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1346 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1347 TREE_TYPE (type
), val
);
1349 /* ??? Condition vectorization expects us to do
1350 promotion of invariant/external defs. */
1351 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1352 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1353 !gsi_end_p (gsi2
); )
1355 init_stmt
= gsi_stmt (gsi2
);
1356 gsi_remove (&gsi2
, false);
1357 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1361 val
= build_vector_from_val (type
, val
);
1364 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1365 init_stmt
= gimple_build_assign (new_temp
, val
);
1366 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1371 /* Function vect_get_vec_defs_for_operand.
1373 OP is an operand in STMT_VINFO. This function returns a vector of
1374 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1376 In the case that OP is an SSA_NAME which is defined in the loop, then
1377 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1379 In case OP is an invariant or constant, a new stmt that creates a vector def
1380 needs to be introduced. VECTYPE may be used to specify a required type for
1381 vector invariant. */
1384 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1386 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1389 enum vect_def_type dt
;
1391 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1393 if (dump_enabled_p ())
1394 dump_printf_loc (MSG_NOTE
, vect_location
,
1395 "vect_get_vec_defs_for_operand: %T\n", op
);
1397 stmt_vec_info def_stmt_info
;
1398 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1399 &def_stmt_info
, &def_stmt
);
1400 gcc_assert (is_simple_use
);
1401 if (def_stmt
&& dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1404 vec_oprnds
->create (ncopies
);
1405 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1407 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1411 vector_type
= vectype
;
1412 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1413 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1414 vector_type
= truth_type_for (stmt_vectype
);
1416 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1418 gcc_assert (vector_type
);
1419 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1421 vec_oprnds
->quick_push (vop
);
1425 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1426 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1427 for (unsigned i
= 0; i
< ncopies
; ++i
)
1428 vec_oprnds
->quick_push (gimple_get_lhs
1429 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1434 /* Get vectorized definitions for OP0 and OP1. */
1437 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1439 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1440 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1441 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1442 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1447 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1449 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1451 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1453 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1458 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1459 op0
, vec_oprnds0
, vectype0
);
1461 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1462 op1
, vec_oprnds1
, vectype1
);
1464 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1465 op2
, vec_oprnds2
, vectype2
);
1467 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1468 op3
, vec_oprnds3
, vectype3
);
1473 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1475 tree op0
, vec
<tree
> *vec_oprnds0
,
1476 tree op1
, vec
<tree
> *vec_oprnds1
,
1477 tree op2
, vec
<tree
> *vec_oprnds2
,
1478 tree op3
, vec
<tree
> *vec_oprnds3
)
1480 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1481 op0
, vec_oprnds0
, NULL_TREE
,
1482 op1
, vec_oprnds1
, NULL_TREE
,
1483 op2
, vec_oprnds2
, NULL_TREE
,
1484 op3
, vec_oprnds3
, NULL_TREE
);
1487 /* Helper function called by vect_finish_replace_stmt and
1488 vect_finish_stmt_generation. Set the location of the new
1489 statement and create and return a stmt_vec_info for it. */
1492 vect_finish_stmt_generation_1 (vec_info
*,
1493 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1495 if (dump_enabled_p ())
1496 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1500 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1502 /* While EH edges will generally prevent vectorization, stmt might
1503 e.g. be in a must-not-throw region. Ensure newly created stmts
1504 that could throw are part of the same region. */
1505 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1506 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1507 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1510 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1513 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1514 which sets the same scalar result as STMT_INFO did. Create and return a
1515 stmt_vec_info for VEC_STMT. */
1518 vect_finish_replace_stmt (vec_info
*vinfo
,
1519 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1521 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1522 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1524 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1525 gsi_replace (&gsi
, vec_stmt
, true);
1527 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1530 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1531 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1534 vect_finish_stmt_generation (vec_info
*vinfo
,
1535 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1536 gimple_stmt_iterator
*gsi
)
1538 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1540 if (!gsi_end_p (*gsi
)
1541 && gimple_has_mem_ops (vec_stmt
))
1543 gimple
*at_stmt
= gsi_stmt (*gsi
);
1544 tree vuse
= gimple_vuse (at_stmt
);
1545 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1547 tree vdef
= gimple_vdef (at_stmt
);
1548 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1549 gimple_set_modified (vec_stmt
, true);
1550 /* If we have an SSA vuse and insert a store, update virtual
1551 SSA form to avoid triggering the renamer. Do so only
1552 if we can easily see all uses - which is what almost always
1553 happens with the way vectorized stmts are inserted. */
1554 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1555 && ((is_gimple_assign (vec_stmt
)
1556 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1557 || (is_gimple_call (vec_stmt
)
1558 && !(gimple_call_flags (vec_stmt
)
1559 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1561 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1562 gimple_set_vdef (vec_stmt
, new_vdef
);
1563 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1567 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1568 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1571 /* We want to vectorize a call to combined function CFN with function
1572 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1573 as the types of all inputs. Check whether this is possible using
1574 an internal function, returning its code if so or IFN_LAST if not. */
1577 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1578 tree vectype_out
, tree vectype_in
)
1581 if (internal_fn_p (cfn
))
1582 ifn
= as_internal_fn (cfn
);
1584 ifn
= associated_internal_fn (fndecl
);
1585 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1587 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1588 if (info
.vectorizable
)
1590 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1591 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1592 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1593 OPTIMIZE_FOR_SPEED
))
1601 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1602 gimple_stmt_iterator
*);
1604 /* Check whether a load or store statement in the loop described by
1605 LOOP_VINFO is possible in a loop using partial vectors. This is
1606 testing whether the vectorizer pass has the appropriate support,
1607 as well as whether the target does.
1609 VLS_TYPE says whether the statement is a load or store and VECTYPE
1610 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1611 says how the load or store is going to be implemented and GROUP_SIZE
1612 is the number of load or store statements in the containing group.
1613 If the access is a gather load or scatter store, GS_INFO describes
1614 its arguments. If the load or store is conditional, SCALAR_MASK is the
1615 condition under which it occurs.
1617 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1618 vectors is not supported, otherwise record the required rgroup control
1622 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1623 vec_load_store_type vls_type
,
1625 vect_memory_access_type
1627 gather_scatter_info
*gs_info
,
1630 /* Invariant loads need no special support. */
1631 if (memory_access_type
== VMAT_INVARIANT
)
1634 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1635 machine_mode vecmode
= TYPE_MODE (vectype
);
1636 bool is_load
= (vls_type
== VLS_LOAD
);
1637 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1640 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1641 : !vect_store_lanes_supported (vectype
, group_size
, true))
1643 if (dump_enabled_p ())
1644 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1645 "can't operate on partial vectors because"
1646 " the target doesn't have an appropriate"
1647 " load/store-lanes instruction.\n");
1648 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1651 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1652 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1656 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1658 internal_fn ifn
= (is_load
1659 ? IFN_MASK_GATHER_LOAD
1660 : IFN_MASK_SCATTER_STORE
);
1661 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1662 gs_info
->memory_type
,
1663 gs_info
->offset_vectype
,
1666 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1668 "can't operate on partial vectors because"
1669 " the target doesn't have an appropriate"
1670 " gather load or scatter store instruction.\n");
1671 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1674 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1675 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1679 if (memory_access_type
!= VMAT_CONTIGUOUS
1680 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1682 /* Element X of the data must come from iteration i * VF + X of the
1683 scalar loop. We need more work to support other mappings. */
1684 if (dump_enabled_p ())
1685 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1686 "can't operate on partial vectors because an"
1687 " access isn't contiguous.\n");
1688 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1692 if (!VECTOR_MODE_P (vecmode
))
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1696 "can't operate on partial vectors when emulating"
1697 " vector operations.\n");
1698 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1702 /* We might load more scalars than we need for permuting SLP loads.
1703 We checked in get_group_load_store_type that the extra elements
1704 don't leak into a new vector. */
1705 auto get_valid_nvectors
= [] (poly_uint64 size
, poly_uint64 nunits
)
1707 unsigned int nvectors
;
1708 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1713 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1714 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1715 machine_mode mask_mode
;
1716 bool using_partial_vectors_p
= false;
1717 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1718 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1720 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1721 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1722 using_partial_vectors_p
= true;
1726 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1728 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1729 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1730 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1731 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1732 using_partial_vectors_p
= true;
1735 if (!using_partial_vectors_p
)
1737 if (dump_enabled_p ())
1738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1739 "can't operate on partial vectors because the"
1740 " target doesn't have the appropriate partial"
1741 " vectorization load or store.\n");
1742 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1746 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1747 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1748 that needs to be applied to all loads and stores in a vectorized loop.
1749 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1751 MASK_TYPE is the type of both masks. If new statements are needed,
1752 insert them before GSI. */
1755 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1756 gimple_stmt_iterator
*gsi
)
1758 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1762 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1763 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1764 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1765 vec_mask
, loop_mask
);
1766 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1770 /* Determine whether we can use a gather load or scatter store to vectorize
1771 strided load or store STMT_INFO by truncating the current offset to a
1772 smaller width. We need to be able to construct an offset vector:
1774 { 0, X, X*2, X*3, ... }
1776 without loss of precision, where X is STMT_INFO's DR_STEP.
1778 Return true if this is possible, describing the gather load or scatter
1779 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1782 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1783 loop_vec_info loop_vinfo
, bool masked_p
,
1784 gather_scatter_info
*gs_info
)
1786 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1787 data_reference
*dr
= dr_info
->dr
;
1788 tree step
= DR_STEP (dr
);
1789 if (TREE_CODE (step
) != INTEGER_CST
)
1791 /* ??? Perhaps we could use range information here? */
1792 if (dump_enabled_p ())
1793 dump_printf_loc (MSG_NOTE
, vect_location
,
1794 "cannot truncate variable step.\n");
1798 /* Get the number of bits in an element. */
1799 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1800 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1801 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1803 /* Set COUNT to the upper limit on the number of elements - 1.
1804 Start with the maximum vectorization factor. */
1805 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1807 /* Try lowering COUNT to the number of scalar latch iterations. */
1808 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1809 widest_int max_iters
;
1810 if (max_loop_iterations (loop
, &max_iters
)
1811 && max_iters
< count
)
1812 count
= max_iters
.to_shwi ();
1814 /* Try scales of 1 and the element size. */
1815 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1816 wi::overflow_type overflow
= wi::OVF_NONE
;
1817 for (int i
= 0; i
< 2; ++i
)
1819 int scale
= scales
[i
];
1821 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1824 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1825 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1828 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1829 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1831 /* Find the narrowest viable offset type. */
1832 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1833 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1836 /* See whether the target supports the operation with an offset
1837 no narrower than OFFSET_TYPE. */
1838 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1839 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1840 vectype
, memory_type
, offset_type
, scale
,
1841 &gs_info
->ifn
, &gs_info
->offset_vectype
))
1844 gs_info
->decl
= NULL_TREE
;
1845 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1846 but we don't need to store that here. */
1847 gs_info
->base
= NULL_TREE
;
1848 gs_info
->element_type
= TREE_TYPE (vectype
);
1849 gs_info
->offset
= fold_convert (offset_type
, step
);
1850 gs_info
->offset_dt
= vect_constant_def
;
1851 gs_info
->scale
= scale
;
1852 gs_info
->memory_type
= memory_type
;
1856 if (overflow
&& dump_enabled_p ())
1857 dump_printf_loc (MSG_NOTE
, vect_location
,
1858 "truncating gather/scatter offset to %d bits"
1859 " might change its value.\n", element_bits
);
1864 /* Return true if we can use gather/scatter internal functions to
1865 vectorize STMT_INFO, which is a grouped or strided load or store.
1866 MASKED_P is true if load or store is conditional. When returning
1867 true, fill in GS_INFO with the information required to perform the
1871 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1872 loop_vec_info loop_vinfo
, bool masked_p
,
1873 gather_scatter_info
*gs_info
)
1875 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1877 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1880 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1881 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1883 gcc_assert (TYPE_PRECISION (new_offset_type
)
1884 >= TYPE_PRECISION (old_offset_type
));
1885 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE
, vect_location
,
1889 "using gather/scatter for strided/grouped access,"
1890 " scale = %d\n", gs_info
->scale
);
1895 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1896 elements with a known constant step. Return -1 if that step
1897 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1900 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1902 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1903 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1907 /* If the target supports a permute mask that reverses the elements in
1908 a vector of type VECTYPE, return that mask, otherwise return null. */
1911 perm_mask_for_reverse (tree vectype
)
1913 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1915 /* The encoding has a single stepped pattern. */
1916 vec_perm_builder
sel (nunits
, 1, 3);
1917 for (int i
= 0; i
< 3; ++i
)
1918 sel
.quick_push (nunits
- 1 - i
);
1920 vec_perm_indices
indices (sel
, 1, nunits
);
1921 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1923 return vect_gen_perm_mask_checked (vectype
, indices
);
1926 /* A subroutine of get_load_store_type, with a subset of the same
1927 arguments. Handle the case where STMT_INFO is a load or store that
1928 accesses consecutive elements with a negative step. */
1930 static vect_memory_access_type
1931 get_negative_load_store_type (vec_info
*vinfo
,
1932 stmt_vec_info stmt_info
, tree vectype
,
1933 vec_load_store_type vls_type
,
1934 unsigned int ncopies
)
1936 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1937 dr_alignment_support alignment_support_scheme
;
1941 if (dump_enabled_p ())
1942 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1943 "multiple types with negative step.\n");
1944 return VMAT_ELEMENTWISE
;
1947 alignment_support_scheme
= vect_supportable_dr_alignment (vinfo
,
1949 if (alignment_support_scheme
!= dr_aligned
1950 && alignment_support_scheme
!= dr_unaligned_supported
)
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1954 "negative step but alignment required.\n");
1955 return VMAT_ELEMENTWISE
;
1958 if (vls_type
== VLS_STORE_INVARIANT
)
1960 if (dump_enabled_p ())
1961 dump_printf_loc (MSG_NOTE
, vect_location
,
1962 "negative step with invariant source;"
1963 " no permute needed.\n");
1964 return VMAT_CONTIGUOUS_DOWN
;
1967 if (!perm_mask_for_reverse (vectype
))
1969 if (dump_enabled_p ())
1970 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1971 "negative step and reversing not supported.\n");
1972 return VMAT_ELEMENTWISE
;
1975 return VMAT_CONTIGUOUS_REVERSE
;
1978 /* STMT_INFO is either a masked or unconditional store. Return the value
1982 vect_get_store_rhs (stmt_vec_info stmt_info
)
1984 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1986 gcc_assert (gimple_assign_single_p (assign
));
1987 return gimple_assign_rhs1 (assign
);
1989 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1991 internal_fn ifn
= gimple_call_internal_fn (call
);
1992 int index
= internal_fn_stored_value_index (ifn
);
1993 gcc_assert (index
>= 0);
1994 return gimple_call_arg (call
, index
);
1999 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2001 This function returns a vector type which can be composed with NETLS pieces,
2002 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2003 same vector size as the return vector. It checks target whether supports
2004 pieces-size vector mode for construction firstly, if target fails to, check
2005 pieces-size scalar mode for construction further. It returns NULL_TREE if
2006 fails to find the available composition.
2008 For example, for (vtype=V16QI, nelts=4), we can probably get:
2009 - V16QI with PTYPE V4QI.
2010 - V4SI with PTYPE SI.
2014 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2016 gcc_assert (VECTOR_TYPE_P (vtype
));
2017 gcc_assert (known_gt (nelts
, 0U));
2019 machine_mode vmode
= TYPE_MODE (vtype
);
2020 if (!VECTOR_MODE_P (vmode
))
2023 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2024 unsigned int pbsize
;
2025 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2027 /* First check if vec_init optab supports construction from
2028 vector pieces directly. */
2029 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2030 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2032 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2033 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2034 != CODE_FOR_nothing
))
2036 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2040 /* Otherwise check if exists an integer type of the same piece size and
2041 if vec_init optab supports construction from it directly. */
2042 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2043 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2044 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2045 != CODE_FOR_nothing
))
2047 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2048 return build_vector_type (*ptype
, nelts
);
2055 /* A subroutine of get_load_store_type, with a subset of the same
2056 arguments. Handle the case where STMT_INFO is part of a grouped load
2059 For stores, the statements in the group are all consecutive
2060 and there is no gap at the end. For loads, the statements in the
2061 group might not be consecutive; there can be gaps between statements
2062 as well as at the end. */
2065 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2066 tree vectype
, slp_tree slp_node
,
2067 bool masked_p
, vec_load_store_type vls_type
,
2068 vect_memory_access_type
*memory_access_type
,
2069 dr_alignment_support
*alignment_support_scheme
,
2070 gather_scatter_info
*gs_info
)
2072 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2073 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2074 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2075 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2076 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2077 bool single_element_p
= (stmt_info
== first_stmt_info
2078 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2079 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2080 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2082 /* True if the vectorized statements would access beyond the last
2083 statement in the group. */
2084 bool overrun_p
= false;
2086 /* True if we can cope with such overrun by peeling for gaps, so that
2087 there is at least one final scalar iteration after the vector loop. */
2088 bool can_overrun_p
= (!masked_p
2089 && vls_type
== VLS_LOAD
2093 /* There can only be a gap at the end of the group if the stride is
2094 known at compile time. */
2095 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2097 /* Stores can't yet have gaps. */
2098 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2102 /* For SLP vectorization we directly vectorize a subchain
2103 without permutation. */
2104 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2106 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2107 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2109 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2110 separated by the stride, until we have a complete vector.
2111 Fall back to scalar accesses if that isn't possible. */
2112 if (multiple_p (nunits
, group_size
))
2113 *memory_access_type
= VMAT_STRIDED_SLP
;
2115 *memory_access_type
= VMAT_ELEMENTWISE
;
2119 overrun_p
= loop_vinfo
&& gap
!= 0;
2120 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2122 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2123 "Grouped store with gaps requires"
2124 " non-consecutive accesses\n");
2127 /* An overrun is fine if the trailing elements are smaller
2128 than the alignment boundary B. Every vector access will
2129 be a multiple of B and so we are guaranteed to access a
2130 non-gap element in the same B-sized block. */
2132 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2133 / vect_get_scalar_dr_size (first_dr_info
)))
2136 /* If the gap splits the vector in half and the target
2137 can do half-vector operations avoid the epilogue peeling
2138 by simply loading half of the vector only. Usually
2139 the construction with an upper zero half will be elided. */
2140 dr_alignment_support alignment_support_scheme
;
2144 && (((alignment_support_scheme
2145 = vect_supportable_dr_alignment (vinfo
,
2146 first_dr_info
, false)))
2148 || alignment_support_scheme
== dr_unaligned_supported
)
2149 && known_eq (nunits
, (group_size
- gap
) * 2)
2150 && known_eq (nunits
, group_size
)
2151 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2155 if (overrun_p
&& !can_overrun_p
)
2157 if (dump_enabled_p ())
2158 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2159 "Peeling for outer loop is not supported\n");
2162 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2165 if (single_element_p
)
2166 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2167 only correct for single element "interleaving" SLP. */
2168 *memory_access_type
= get_negative_load_store_type
2169 (vinfo
, stmt_info
, vectype
, vls_type
, 1);
2172 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2173 separated by the stride, until we have a complete vector.
2174 Fall back to scalar accesses if that isn't possible. */
2175 if (multiple_p (nunits
, group_size
))
2176 *memory_access_type
= VMAT_STRIDED_SLP
;
2178 *memory_access_type
= VMAT_ELEMENTWISE
;
2183 gcc_assert (!loop_vinfo
|| cmp
> 0);
2184 *memory_access_type
= VMAT_CONTIGUOUS
;
2190 /* We can always handle this case using elementwise accesses,
2191 but see if something more efficient is available. */
2192 *memory_access_type
= VMAT_ELEMENTWISE
;
2194 /* If there is a gap at the end of the group then these optimizations
2195 would access excess elements in the last iteration. */
2196 bool would_overrun_p
= (gap
!= 0);
2197 /* An overrun is fine if the trailing elements are smaller than the
2198 alignment boundary B. Every vector access will be a multiple of B
2199 and so we are guaranteed to access a non-gap element in the
2200 same B-sized block. */
2203 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2204 / vect_get_scalar_dr_size (first_dr_info
)))
2205 would_overrun_p
= false;
2207 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2208 && (can_overrun_p
|| !would_overrun_p
)
2209 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2211 /* First cope with the degenerate case of a single-element
2213 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2216 /* Otherwise try using LOAD/STORE_LANES. */
2217 else if (vls_type
== VLS_LOAD
2218 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2219 : vect_store_lanes_supported (vectype
, group_size
,
2222 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2223 overrun_p
= would_overrun_p
;
2226 /* If that fails, try using permuting loads. */
2227 else if (vls_type
== VLS_LOAD
2228 ? vect_grouped_load_supported (vectype
, single_element_p
,
2230 : vect_grouped_store_supported (vectype
, group_size
))
2232 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2233 overrun_p
= would_overrun_p
;
2237 /* As a last resort, trying using a gather load or scatter store.
2239 ??? Although the code can handle all group sizes correctly,
2240 it probably isn't a win to use separate strided accesses based
2241 on nearby locations. Or, even if it's a win over scalar code,
2242 it might not be a win over vectorizing at a lower VF, if that
2243 allows us to use contiguous accesses. */
2244 if (*memory_access_type
== VMAT_ELEMENTWISE
2247 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2249 *memory_access_type
= VMAT_GATHER_SCATTER
;
2252 if (*memory_access_type
== VMAT_GATHER_SCATTER
2253 || *memory_access_type
== VMAT_ELEMENTWISE
)
2254 *alignment_support_scheme
= dr_unaligned_supported
;
2256 *alignment_support_scheme
2257 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
2259 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2261 /* STMT is the leader of the group. Check the operands of all the
2262 stmts of the group. */
2263 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2264 while (next_stmt_info
)
2266 tree op
= vect_get_store_rhs (next_stmt_info
);
2267 enum vect_def_type dt
;
2268 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2272 "use not simple.\n");
2275 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2281 gcc_assert (can_overrun_p
);
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2284 "Data access with gaps requires scalar "
2286 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2292 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2293 if there is a memory access type that the vectorized form can use,
2294 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2295 or scatters, fill in GS_INFO accordingly. In addition
2296 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2297 the target does not support the alignment scheme.
2299 SLP says whether we're performing SLP rather than loop vectorization.
2300 MASKED_P is true if the statement is conditional on a vectorized mask.
2301 VECTYPE is the vector type that the vectorized statements will use.
2302 NCOPIES is the number of vector statements that will be needed. */
2305 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2306 tree vectype
, slp_tree slp_node
,
2307 bool masked_p
, vec_load_store_type vls_type
,
2308 unsigned int ncopies
,
2309 vect_memory_access_type
*memory_access_type
,
2310 dr_alignment_support
*alignment_support_scheme
,
2311 gather_scatter_info
*gs_info
)
2313 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2314 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2315 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2317 *memory_access_type
= VMAT_GATHER_SCATTER
;
2318 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2320 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2321 &gs_info
->offset_dt
,
2322 &gs_info
->offset_vectype
))
2324 if (dump_enabled_p ())
2325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2326 "%s index use not simple.\n",
2327 vls_type
== VLS_LOAD
? "gather" : "scatter");
2330 /* Gather-scatter accesses perform only component accesses, alignment
2331 is irrelevant for them. */
2332 *alignment_support_scheme
= dr_unaligned_supported
;
2334 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2336 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2338 vls_type
, memory_access_type
,
2339 alignment_support_scheme
, gs_info
))
2342 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2344 gcc_assert (!slp_node
);
2346 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2348 *memory_access_type
= VMAT_GATHER_SCATTER
;
2350 *memory_access_type
= VMAT_ELEMENTWISE
;
2351 /* Alignment is irrelevant here. */
2352 *alignment_support_scheme
= dr_unaligned_supported
;
2356 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2358 *memory_access_type
= get_negative_load_store_type
2359 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
);
2362 gcc_assert (vls_type
== VLS_LOAD
);
2363 *memory_access_type
= VMAT_INVARIANT
;
2366 *memory_access_type
= VMAT_CONTIGUOUS
;
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo
,
2369 STMT_VINFO_DR_INFO (stmt_info
), false);
2372 if ((*memory_access_type
== VMAT_ELEMENTWISE
2373 || *memory_access_type
== VMAT_STRIDED_SLP
)
2374 && !nunits
.is_constant ())
2376 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2378 "Not using elementwise accesses due to variable "
2379 "vectorization factor.\n");
2383 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2387 "unsupported unaligned access\n");
2391 /* FIXME: At the moment the cost model seems to underestimate the
2392 cost of using elementwise accesses. This check preserves the
2393 traditional behavior until that can be fixed. */
2394 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2395 if (!first_stmt_info
)
2396 first_stmt_info
= stmt_info
;
2397 if (*memory_access_type
== VMAT_ELEMENTWISE
2398 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2399 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2400 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2401 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2403 if (dump_enabled_p ())
2404 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2405 "not falling back to elementwise accesses\n");
2411 /* Return true if boolean argument MASK is suitable for vectorizing
2412 conditional operation STMT_INFO. When returning true, store the type
2413 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2414 in *MASK_VECTYPE_OUT. */
2417 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree mask
,
2418 vect_def_type
*mask_dt_out
,
2419 tree
*mask_vectype_out
)
2421 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2423 if (dump_enabled_p ())
2424 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2425 "mask argument is not a boolean.\n");
2429 if (TREE_CODE (mask
) != SSA_NAME
)
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2433 "mask argument is not an SSA name.\n");
2437 enum vect_def_type mask_dt
;
2439 if (!vect_is_simple_use (mask
, vinfo
, &mask_dt
, &mask_vectype
))
2441 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2443 "mask use not simple.\n");
2447 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2449 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2451 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2453 if (dump_enabled_p ())
2454 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2455 "could not find an appropriate vector mask type.\n");
2459 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2460 TYPE_VECTOR_SUBPARTS (vectype
)))
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2464 "vector mask type %T"
2465 " does not match vector data type %T.\n",
2466 mask_vectype
, vectype
);
2471 *mask_dt_out
= mask_dt
;
2472 *mask_vectype_out
= mask_vectype
;
2476 /* Return true if stored value RHS is suitable for vectorizing store
2477 statement STMT_INFO. When returning true, store the type of the
2478 definition in *RHS_DT_OUT, the type of the vectorized store value in
2479 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2482 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2483 slp_tree slp_node
, tree rhs
,
2484 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2485 vec_load_store_type
*vls_type_out
)
2487 /* In the case this is a store from a constant make sure
2488 native_encode_expr can handle it. */
2489 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2493 "cannot encode constant as a byte sequence.\n");
2497 enum vect_def_type rhs_dt
;
2500 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
2501 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2505 "use not simple.\n");
2509 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2510 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2514 "incompatible vector types.\n");
2518 *rhs_dt_out
= rhs_dt
;
2519 *rhs_vectype_out
= rhs_vectype
;
2520 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2521 *vls_type_out
= VLS_STORE_INVARIANT
;
2523 *vls_type_out
= VLS_STORE
;
2527 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2528 Note that we support masks with floating-point type, in which case the
2529 floats are interpreted as a bitmask. */
2532 vect_build_all_ones_mask (vec_info
*vinfo
,
2533 stmt_vec_info stmt_info
, tree masktype
)
2535 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2536 return build_int_cst (masktype
, -1);
2537 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2539 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2540 mask
= build_vector_from_val (masktype
, mask
);
2541 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2543 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2547 for (int j
= 0; j
< 6; ++j
)
2549 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2550 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2551 mask
= build_vector_from_val (masktype
, mask
);
2552 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2557 /* Build an all-zero merge value of type VECTYPE while vectorizing
2558 STMT_INFO as a gather load. */
2561 vect_build_zero_merge_argument (vec_info
*vinfo
,
2562 stmt_vec_info stmt_info
, tree vectype
)
2565 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2566 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2567 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2571 for (int j
= 0; j
< 6; ++j
)
2573 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2574 merge
= build_real (TREE_TYPE (vectype
), r
);
2578 merge
= build_vector_from_val (vectype
, merge
);
2579 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2582 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2583 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2584 the gather load operation. If the load is conditional, MASK is the
2585 unvectorized condition and MASK_DT is its definition type, otherwise
2589 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2590 gimple_stmt_iterator
*gsi
,
2592 gather_scatter_info
*gs_info
,
2595 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2596 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2597 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2598 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2599 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2600 edge pe
= loop_preheader_edge (loop
);
2601 enum { NARROW
, NONE
, WIDEN
} modifier
;
2602 poly_uint64 gather_off_nunits
2603 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2605 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2606 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2607 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2608 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2609 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2610 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2611 tree scaletype
= TREE_VALUE (arglist
);
2612 tree real_masktype
= masktype
;
2613 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2615 || TREE_CODE (masktype
) == INTEGER_TYPE
2616 || types_compatible_p (srctype
, masktype
)));
2617 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2618 masktype
= truth_type_for (srctype
);
2620 tree mask_halftype
= masktype
;
2621 tree perm_mask
= NULL_TREE
;
2622 tree mask_perm_mask
= NULL_TREE
;
2623 if (known_eq (nunits
, gather_off_nunits
))
2625 else if (known_eq (nunits
* 2, gather_off_nunits
))
2629 /* Currently widening gathers and scatters are only supported for
2630 fixed-length vectors. */
2631 int count
= gather_off_nunits
.to_constant ();
2632 vec_perm_builder
sel (count
, count
, 1);
2633 for (int i
= 0; i
< count
; ++i
)
2634 sel
.quick_push (i
| (count
/ 2));
2636 vec_perm_indices
indices (sel
, 1, count
);
2637 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2640 else if (known_eq (nunits
, gather_off_nunits
* 2))
2644 /* Currently narrowing gathers and scatters are only supported for
2645 fixed-length vectors. */
2646 int count
= nunits
.to_constant ();
2647 vec_perm_builder
sel (count
, count
, 1);
2648 sel
.quick_grow (count
);
2649 for (int i
= 0; i
< count
; ++i
)
2650 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2651 vec_perm_indices
indices (sel
, 2, count
);
2652 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2656 if (mask
&& masktype
== real_masktype
)
2658 for (int i
= 0; i
< count
; ++i
)
2659 sel
[i
] = i
| (count
/ 2);
2660 indices
.new_vector (sel
, 2, count
);
2661 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2664 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2669 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2670 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2672 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2673 if (!is_gimple_min_invariant (ptr
))
2676 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2677 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2678 gcc_assert (!new_bb
);
2681 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2683 tree vec_oprnd0
= NULL_TREE
;
2684 tree vec_mask
= NULL_TREE
;
2685 tree src_op
= NULL_TREE
;
2686 tree mask_op
= NULL_TREE
;
2687 tree prev_res
= NULL_TREE
;
2691 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2692 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2695 auto_vec
<tree
> vec_oprnds0
;
2696 auto_vec
<tree
> vec_masks
;
2697 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2698 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2699 gs_info
->offset
, &vec_oprnds0
);
2701 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2702 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2704 for (int j
= 0; j
< ncopies
; ++j
)
2707 if (modifier
== WIDEN
&& (j
& 1))
2708 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2709 perm_mask
, stmt_info
, gsi
);
2711 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2713 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2715 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2716 TYPE_VECTOR_SUBPARTS (idxtype
)));
2717 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2718 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2719 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2720 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2726 if (mask_perm_mask
&& (j
& 1))
2727 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2728 mask_perm_mask
, stmt_info
, gsi
);
2731 if (modifier
== NARROW
)
2734 vec_mask
= vec_masks
[j
/ 2];
2737 vec_mask
= vec_masks
[j
];
2740 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2742 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2743 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2744 gcc_assert (known_eq (sub1
, sub2
));
2745 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2746 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2748 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2749 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2753 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2755 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2757 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2758 : VEC_UNPACK_LO_EXPR
,
2760 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2766 tree mask_arg
= mask_op
;
2767 if (masktype
!= real_masktype
)
2769 tree utype
, optype
= TREE_TYPE (mask_op
);
2770 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2771 utype
= real_masktype
;
2773 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2774 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2775 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2777 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2778 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2780 if (!useless_type_conversion_p (real_masktype
, utype
))
2782 gcc_assert (TYPE_PRECISION (utype
)
2783 <= TYPE_PRECISION (real_masktype
));
2784 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2785 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2786 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2789 src_op
= build_zero_cst (srctype
);
2791 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2794 if (!useless_type_conversion_p (vectype
, rettype
))
2796 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2797 TYPE_VECTOR_SUBPARTS (rettype
)));
2798 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2799 gimple_call_set_lhs (new_stmt
, op
);
2800 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2801 var
= make_ssa_name (vec_dest
);
2802 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2803 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2804 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2808 var
= make_ssa_name (vec_dest
, new_stmt
);
2809 gimple_call_set_lhs (new_stmt
, var
);
2810 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2813 if (modifier
== NARROW
)
2820 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2822 new_stmt
= SSA_NAME_DEF_STMT (var
);
2825 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2827 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2830 /* Prepare the base and offset in GS_INFO for vectorization.
2831 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2832 to the vectorized offset argument for the first copy of STMT_INFO.
2833 STMT_INFO is the statement described by GS_INFO and LOOP is the
2837 vect_get_gather_scatter_ops (vec_info
*vinfo
,
2838 class loop
*loop
, stmt_vec_info stmt_info
,
2839 gather_scatter_info
*gs_info
,
2840 tree
*dataref_ptr
, vec
<tree
> *vec_offset
,
2843 gimple_seq stmts
= NULL
;
2844 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2848 edge pe
= loop_preheader_edge (loop
);
2849 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2850 gcc_assert (!new_bb
);
2852 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, gs_info
->offset
,
2853 vec_offset
, gs_info
->offset_vectype
);
2856 /* Prepare to implement a grouped or strided load or store using
2857 the gather load or scatter store operation described by GS_INFO.
2858 STMT_INFO is the load or store statement.
2860 Set *DATAREF_BUMP to the amount that should be added to the base
2861 address after each copy of the vectorized statement. Set *VEC_OFFSET
2862 to an invariant offset vector in which element I has the value
2863 I * DR_STEP / SCALE. */
2866 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2867 loop_vec_info loop_vinfo
,
2868 gather_scatter_info
*gs_info
,
2869 tree
*dataref_bump
, tree
*vec_offset
)
2871 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2872 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2874 tree bump
= size_binop (MULT_EXPR
,
2875 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2876 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2877 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
2879 /* The offset given in GS_INFO can have pointer type, so use the element
2880 type of the vector instead. */
2881 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2883 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2884 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2885 ssize_int (gs_info
->scale
));
2886 step
= fold_convert (offset_type
, step
);
2888 /* Create {0, X, X*2, X*3, ...}. */
2889 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2890 build_zero_cst (offset_type
), step
);
2891 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
2894 /* Return the amount that should be added to a vector pointer to move
2895 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2896 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2900 vect_get_data_ptr_increment (vec_info
*vinfo
,
2901 dr_vec_info
*dr_info
, tree aggr_type
,
2902 vect_memory_access_type memory_access_type
)
2904 if (memory_access_type
== VMAT_INVARIANT
)
2905 return size_zero_node
;
2907 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2908 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
2909 if (tree_int_cst_sgn (step
) == -1)
2910 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2914 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2917 vectorizable_bswap (vec_info
*vinfo
,
2918 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
2919 gimple
**vec_stmt
, slp_tree slp_node
,
2921 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
2924 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
2925 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2928 op
= gimple_call_arg (stmt
, 0);
2929 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2930 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2932 /* Multiple types in SLP are handled by creating the appropriate number of
2933 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2938 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2940 gcc_assert (ncopies
>= 1);
2942 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2946 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2947 unsigned word_bytes
;
2948 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
2951 /* The encoding uses one stepped pattern for each byte in the word. */
2952 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2953 for (unsigned i
= 0; i
< 3; ++i
)
2954 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2955 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2957 vec_perm_indices
indices (elts
, 1, num_bytes
);
2958 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2964 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
2966 if (dump_enabled_p ())
2967 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2968 "incompatible vector types for invariants\n");
2972 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2973 DUMP_VECT_SCOPE ("vectorizable_bswap");
2976 record_stmt_cost (cost_vec
,
2977 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2978 record_stmt_cost (cost_vec
,
2979 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2984 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
2987 vec
<tree
> vec_oprnds
= vNULL
;
2988 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
2990 /* Arguments are ready. create the new vector stmt. */
2993 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2996 tree tem
= make_ssa_name (char_vectype
);
2997 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2998 char_vectype
, vop
));
2999 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3000 tree tem2
= make_ssa_name (char_vectype
);
3001 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3002 tem
, tem
, bswap_vconst
);
3003 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3004 tem
= make_ssa_name (vectype
);
3005 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3007 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3009 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3011 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3015 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3017 vec_oprnds
.release ();
3021 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3022 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3023 in a single step. On success, store the binary pack code in
3027 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3028 tree_code
*convert_code
)
3030 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3031 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3035 int multi_step_cvt
= 0;
3036 auto_vec
<tree
, 8> interm_types
;
3037 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3038 &code
, &multi_step_cvt
, &interm_types
)
3042 *convert_code
= code
;
3046 /* Function vectorizable_call.
3048 Check if STMT_INFO performs a function call that can be vectorized.
3049 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3050 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3051 Return true if STMT_INFO is vectorizable in this way. */
3054 vectorizable_call (vec_info
*vinfo
,
3055 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3056 gimple
**vec_stmt
, slp_tree slp_node
,
3057 stmt_vector_for_cost
*cost_vec
)
3063 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3064 tree vectype_out
, vectype_in
;
3065 poly_uint64 nunits_in
;
3066 poly_uint64 nunits_out
;
3067 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3068 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3069 tree fndecl
, new_temp
, rhs_type
;
3070 enum vect_def_type dt
[4]
3071 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3072 vect_unknown_def_type
};
3073 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3074 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3075 int ndts
= ARRAY_SIZE (dt
);
3077 auto_vec
<tree
, 8> vargs
;
3078 auto_vec
<tree
, 8> orig_vargs
;
3079 enum { NARROW
, NONE
, WIDEN
} modifier
;
3083 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3086 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3090 /* Is STMT_INFO a vectorizable call? */
3091 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3095 if (gimple_call_internal_p (stmt
)
3096 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3097 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3098 /* Handled by vectorizable_load and vectorizable_store. */
3101 if (gimple_call_lhs (stmt
) == NULL_TREE
3102 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3105 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3107 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3109 /* Process function arguments. */
3110 rhs_type
= NULL_TREE
;
3111 vectype_in
= NULL_TREE
;
3112 nargs
= gimple_call_num_args (stmt
);
3114 /* Bail out if the function has more than four arguments, we do not have
3115 interesting builtin functions to vectorize with more than two arguments
3116 except for fma. No arguments is also not good. */
3117 if (nargs
== 0 || nargs
> 4)
3120 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3121 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3122 if (cfn
== CFN_GOMP_SIMD_LANE
)
3125 rhs_type
= unsigned_type_node
;
3129 if (internal_fn_p (cfn
))
3130 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3132 for (i
= 0; i
< nargs
; i
++)
3134 if ((int) i
== mask_opno
)
3136 op
= gimple_call_arg (stmt
, i
);
3137 if (!vect_check_scalar_mask (vinfo
,
3138 stmt_info
, op
, &dt
[i
], &vectypes
[i
]))
3143 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3144 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3146 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3148 "use not simple.\n");
3152 /* We can only handle calls with arguments of the same type. */
3154 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3156 if (dump_enabled_p ())
3157 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3158 "argument types differ.\n");
3162 rhs_type
= TREE_TYPE (op
);
3165 vectype_in
= vectypes
[i
];
3166 else if (vectypes
[i
]
3167 && !types_compatible_p (vectypes
[i
], vectype_in
))
3169 if (dump_enabled_p ())
3170 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3171 "argument vector types differ.\n");
3175 /* If all arguments are external or constant defs, infer the vector type
3176 from the scalar type. */
3178 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3180 gcc_assert (vectype_in
);
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3185 "no vectype for scalar type %T\n", rhs_type
);
3189 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3190 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3191 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3192 by a pack of the two vectors into an SI vector. We would need
3193 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3194 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3196 if (dump_enabled_p ())
3197 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3198 "mismatched vector sizes %T and %T\n",
3199 vectype_in
, vectype_out
);
3203 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3204 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3206 if (dump_enabled_p ())
3207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3208 "mixed mask and nonmask vector types\n");
3213 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3214 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3215 if (known_eq (nunits_in
* 2, nunits_out
))
3217 else if (known_eq (nunits_out
, nunits_in
))
3219 else if (known_eq (nunits_out
* 2, nunits_in
))
3224 /* We only handle functions that do not read or clobber memory. */
3225 if (gimple_vuse (stmt
))
3227 if (dump_enabled_p ())
3228 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3229 "function reads from or writes to memory.\n");
3233 /* For now, we only vectorize functions if a target specific builtin
3234 is available. TODO -- in some cases, it might be profitable to
3235 insert the calls for pieces of the vector, in order to be able
3236 to vectorize other operations in the loop. */
3238 internal_fn ifn
= IFN_LAST
;
3239 tree callee
= gimple_call_fndecl (stmt
);
3241 /* First try using an internal function. */
3242 tree_code convert_code
= ERROR_MARK
;
3244 && (modifier
== NONE
3245 || (modifier
== NARROW
3246 && simple_integer_narrowing (vectype_out
, vectype_in
,
3248 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3251 /* If that fails, try asking for a target-specific built-in function. */
3252 if (ifn
== IFN_LAST
)
3254 if (cfn
!= CFN_LAST
)
3255 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3256 (cfn
, vectype_out
, vectype_in
);
3257 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3258 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3259 (callee
, vectype_out
, vectype_in
);
3262 if (ifn
== IFN_LAST
&& !fndecl
)
3264 if (cfn
== CFN_GOMP_SIMD_LANE
3267 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3268 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3269 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3270 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3272 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3273 { 0, 1, 2, ... vf - 1 } vector. */
3274 gcc_assert (nargs
== 0);
3276 else if (modifier
== NONE
3277 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3278 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3279 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3280 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3281 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3282 slp_op
, vectype_in
, cost_vec
);
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3287 "function is not vectorizable.\n");
3294 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3295 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3297 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3299 /* Sanity check: make sure that at least one copy of the vectorized stmt
3300 needs to be generated. */
3301 gcc_assert (ncopies
>= 1);
3303 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3304 if (!vec_stmt
) /* transformation not required. */
3307 for (i
= 0; i
< nargs
; ++i
)
3308 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3312 "incompatible vector types for invariants\n");
3315 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3316 DUMP_VECT_SCOPE ("vectorizable_call");
3317 vect_model_simple_cost (vinfo
, stmt_info
,
3318 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3319 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3320 record_stmt_cost (cost_vec
, ncopies
/ 2,
3321 vec_promote_demote
, stmt_info
, 0, vect_body
);
3323 if (loop_vinfo
&& mask_opno
>= 0)
3325 unsigned int nvectors
= (slp_node
3326 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3328 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3329 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3330 vectype_out
, scalar_mask
);
3337 if (dump_enabled_p ())
3338 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3341 scalar_dest
= gimple_call_lhs (stmt
);
3342 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3344 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3346 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3348 tree prev_res
= NULL_TREE
;
3349 vargs
.safe_grow (nargs
, true);
3350 orig_vargs
.safe_grow (nargs
, true);
3351 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3352 for (j
= 0; j
< ncopies
; ++j
)
3354 /* Build argument list for the vectorized call. */
3357 vec
<tree
> vec_oprnds0
;
3359 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3360 vec_oprnds0
= vec_defs
[0];
3362 /* Arguments are ready. Create the new vector stmt. */
3363 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3366 for (k
= 0; k
< nargs
; k
++)
3368 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3369 vargs
[k
] = vec_oprndsk
[i
];
3372 if (modifier
== NARROW
)
3374 /* We don't define any narrowing conditional functions
3376 gcc_assert (mask_opno
< 0);
3377 tree half_res
= make_ssa_name (vectype_in
);
3379 = gimple_build_call_internal_vec (ifn
, vargs
);
3380 gimple_call_set_lhs (call
, half_res
);
3381 gimple_call_set_nothrow (call
, true);
3382 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3385 prev_res
= half_res
;
3388 new_temp
= make_ssa_name (vec_dest
);
3389 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3390 prev_res
, half_res
);
3391 vect_finish_stmt_generation (vinfo
, stmt_info
,
3396 if (mask_opno
>= 0 && masked_loop_p
)
3398 unsigned int vec_num
= vec_oprnds0
.length ();
3399 /* Always true for SLP. */
3400 gcc_assert (ncopies
== 1);
3401 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3403 vargs
[mask_opno
] = prepare_load_store_mask
3404 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3408 if (ifn
!= IFN_LAST
)
3409 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3411 call
= gimple_build_call_vec (fndecl
, vargs
);
3412 new_temp
= make_ssa_name (vec_dest
, call
);
3413 gimple_call_set_lhs (call
, new_temp
);
3414 gimple_call_set_nothrow (call
, true);
3415 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3418 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3423 for (i
= 0; i
< nargs
; i
++)
3425 op
= gimple_call_arg (stmt
, i
);
3428 vec_defs
.quick_push (vNULL
);
3429 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3432 orig_vargs
[i
] = vargs
[i
] = vec_defs
[i
][j
];
3435 if (mask_opno
>= 0 && masked_loop_p
)
3437 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3440 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3441 vargs
[mask_opno
], gsi
);
3445 if (cfn
== CFN_GOMP_SIMD_LANE
)
3447 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3449 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3450 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3451 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3452 new_temp
= make_ssa_name (vec_dest
);
3453 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3454 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3456 else if (modifier
== NARROW
)
3458 /* We don't define any narrowing conditional functions at
3460 gcc_assert (mask_opno
< 0);
3461 tree half_res
= make_ssa_name (vectype_in
);
3462 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3463 gimple_call_set_lhs (call
, half_res
);
3464 gimple_call_set_nothrow (call
, true);
3465 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3468 prev_res
= half_res
;
3471 new_temp
= make_ssa_name (vec_dest
);
3472 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3473 prev_res
, half_res
);
3474 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3479 if (ifn
!= IFN_LAST
)
3480 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3482 call
= gimple_build_call_vec (fndecl
, vargs
);
3483 new_temp
= make_ssa_name (vec_dest
, call
);
3484 gimple_call_set_lhs (call
, new_temp
);
3485 gimple_call_set_nothrow (call
, true);
3486 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3490 if (j
== (modifier
== NARROW
? 1 : 0))
3491 *vec_stmt
= new_stmt
;
3492 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3494 for (i
= 0; i
< nargs
; i
++)
3496 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3497 vec_oprndsi
.release ();
3500 else if (modifier
== NARROW
)
3502 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3503 /* We don't define any narrowing conditional functions at present. */
3504 gcc_assert (mask_opno
< 0);
3505 for (j
= 0; j
< ncopies
; ++j
)
3507 /* Build argument list for the vectorized call. */
3509 vargs
.create (nargs
* 2);
3515 vec
<tree
> vec_oprnds0
;
3517 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3518 vec_oprnds0
= vec_defs
[0];
3520 /* Arguments are ready. Create the new vector stmt. */
3521 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3525 for (k
= 0; k
< nargs
; k
++)
3527 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3528 vargs
.quick_push (vec_oprndsk
[i
]);
3529 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3532 if (ifn
!= IFN_LAST
)
3533 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3535 call
= gimple_build_call_vec (fndecl
, vargs
);
3536 new_temp
= make_ssa_name (vec_dest
, call
);
3537 gimple_call_set_lhs (call
, new_temp
);
3538 gimple_call_set_nothrow (call
, true);
3539 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3540 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3545 for (i
= 0; i
< nargs
; i
++)
3547 op
= gimple_call_arg (stmt
, i
);
3550 vec_defs
.quick_push (vNULL
);
3551 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3552 op
, &vec_defs
[i
], vectypes
[i
]);
3554 vec_oprnd0
= vec_defs
[i
][2*j
];
3555 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3557 vargs
.quick_push (vec_oprnd0
);
3558 vargs
.quick_push (vec_oprnd1
);
3561 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3562 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3563 gimple_call_set_lhs (new_stmt
, new_temp
);
3564 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3566 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3570 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3572 for (i
= 0; i
< nargs
; i
++)
3574 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3575 vec_oprndsi
.release ();
3579 /* No current target implements this case. */
3584 /* The call in STMT might prevent it from being removed in dce.
3585 We however cannot remove it here, due to the way the ssa name
3586 it defines is mapped to the new definition. So just replace
3587 rhs of the statement with something harmless. */
3592 stmt_info
= vect_orig_stmt (stmt_info
);
3593 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3596 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3597 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3603 struct simd_call_arg_info
3607 HOST_WIDE_INT linear_step
;
3608 enum vect_def_type dt
;
3610 bool simd_lane_linear
;
3613 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3614 is linear within simd lane (but not within whole loop), note it in
3618 vect_simd_lane_linear (tree op
, class loop
*loop
,
3619 struct simd_call_arg_info
*arginfo
)
3621 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3623 if (!is_gimple_assign (def_stmt
)
3624 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3625 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3628 tree base
= gimple_assign_rhs1 (def_stmt
);
3629 HOST_WIDE_INT linear_step
= 0;
3630 tree v
= gimple_assign_rhs2 (def_stmt
);
3631 while (TREE_CODE (v
) == SSA_NAME
)
3634 def_stmt
= SSA_NAME_DEF_STMT (v
);
3635 if (is_gimple_assign (def_stmt
))
3636 switch (gimple_assign_rhs_code (def_stmt
))
3639 t
= gimple_assign_rhs2 (def_stmt
);
3640 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3642 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3643 v
= gimple_assign_rhs1 (def_stmt
);
3646 t
= gimple_assign_rhs2 (def_stmt
);
3647 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3649 linear_step
= tree_to_shwi (t
);
3650 v
= gimple_assign_rhs1 (def_stmt
);
3653 t
= gimple_assign_rhs1 (def_stmt
);
3654 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3655 || (TYPE_PRECISION (TREE_TYPE (v
))
3656 < TYPE_PRECISION (TREE_TYPE (t
))))
3665 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3667 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3668 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3673 arginfo
->linear_step
= linear_step
;
3675 arginfo
->simd_lane_linear
= true;
3681 /* Return the number of elements in vector type VECTYPE, which is associated
3682 with a SIMD clone. At present these vectors always have a constant
3685 static unsigned HOST_WIDE_INT
3686 simd_clone_subparts (tree vectype
)
3688 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3691 /* Function vectorizable_simd_clone_call.
3693 Check if STMT_INFO performs a function call that can be vectorized
3694 by calling a simd clone of the function.
3695 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3696 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3697 Return true if STMT_INFO is vectorizable in this way. */
3700 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3701 gimple_stmt_iterator
*gsi
,
3702 gimple
**vec_stmt
, slp_tree slp_node
,
3703 stmt_vector_for_cost
*)
3708 tree vec_oprnd0
= NULL_TREE
;
3711 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3712 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3713 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3714 tree fndecl
, new_temp
;
3716 auto_vec
<simd_call_arg_info
> arginfo
;
3717 vec
<tree
> vargs
= vNULL
;
3719 tree lhs
, rtype
, ratype
;
3720 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3722 /* Is STMT a vectorizable call? */
3723 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3727 fndecl
= gimple_call_fndecl (stmt
);
3728 if (fndecl
== NULL_TREE
)
3731 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3732 if (node
== NULL
|| node
->simd_clones
== NULL
)
3735 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3738 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3742 if (gimple_call_lhs (stmt
)
3743 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3746 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3748 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3750 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3757 /* Process function arguments. */
3758 nargs
= gimple_call_num_args (stmt
);
3760 /* Bail out if the function has zero arguments. */
3764 arginfo
.reserve (nargs
, true);
3766 for (i
= 0; i
< nargs
; i
++)
3768 simd_call_arg_info thisarginfo
;
3771 thisarginfo
.linear_step
= 0;
3772 thisarginfo
.align
= 0;
3773 thisarginfo
.op
= NULL_TREE
;
3774 thisarginfo
.simd_lane_linear
= false;
3776 op
= gimple_call_arg (stmt
, i
);
3777 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3778 &thisarginfo
.vectype
)
3779 || thisarginfo
.dt
== vect_uninitialized_def
)
3781 if (dump_enabled_p ())
3782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3783 "use not simple.\n");
3787 if (thisarginfo
.dt
== vect_constant_def
3788 || thisarginfo
.dt
== vect_external_def
)
3789 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3792 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3793 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3795 if (dump_enabled_p ())
3796 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3797 "vector mask arguments are not supported\n");
3802 /* For linear arguments, the analyze phase should have saved
3803 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3804 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3805 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3807 gcc_assert (vec_stmt
);
3808 thisarginfo
.linear_step
3809 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3811 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3812 thisarginfo
.simd_lane_linear
3813 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3814 == boolean_true_node
);
3815 /* If loop has been peeled for alignment, we need to adjust it. */
3816 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3817 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3818 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3820 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3821 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3822 tree opt
= TREE_TYPE (thisarginfo
.op
);
3823 bias
= fold_convert (TREE_TYPE (step
), bias
);
3824 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3826 = fold_build2 (POINTER_TYPE_P (opt
)
3827 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3828 thisarginfo
.op
, bias
);
3832 && thisarginfo
.dt
!= vect_constant_def
3833 && thisarginfo
.dt
!= vect_external_def
3835 && TREE_CODE (op
) == SSA_NAME
3836 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3838 && tree_fits_shwi_p (iv
.step
))
3840 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3841 thisarginfo
.op
= iv
.base
;
3843 else if ((thisarginfo
.dt
== vect_constant_def
3844 || thisarginfo
.dt
== vect_external_def
)
3845 && POINTER_TYPE_P (TREE_TYPE (op
)))
3846 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3847 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3849 if (POINTER_TYPE_P (TREE_TYPE (op
))
3850 && !thisarginfo
.linear_step
3852 && thisarginfo
.dt
!= vect_constant_def
3853 && thisarginfo
.dt
!= vect_external_def
3856 && TREE_CODE (op
) == SSA_NAME
)
3857 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3859 arginfo
.quick_push (thisarginfo
);
3862 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3863 if (!vf
.is_constant ())
3865 if (dump_enabled_p ())
3866 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3867 "not considering SIMD clones; not yet supported"
3868 " for variable-width vectors.\n");
3872 unsigned int badness
= 0;
3873 struct cgraph_node
*bestn
= NULL
;
3874 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3875 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3877 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3878 n
= n
->simdclone
->next_clone
)
3880 unsigned int this_badness
= 0;
3881 unsigned int num_calls
;
3882 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
3883 || n
->simdclone
->nargs
!= nargs
)
3886 this_badness
+= exact_log2 (num_calls
) * 1024;
3887 if (n
->simdclone
->inbranch
)
3888 this_badness
+= 2048;
3889 int target_badness
= targetm
.simd_clone
.usable (n
);
3890 if (target_badness
< 0)
3892 this_badness
+= target_badness
* 512;
3893 /* FORNOW: Have to add code to add the mask argument. */
3894 if (n
->simdclone
->inbranch
)
3896 for (i
= 0; i
< nargs
; i
++)
3898 switch (n
->simdclone
->args
[i
].arg_type
)
3900 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3901 if (!useless_type_conversion_p
3902 (n
->simdclone
->args
[i
].orig_type
,
3903 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3905 else if (arginfo
[i
].dt
== vect_constant_def
3906 || arginfo
[i
].dt
== vect_external_def
3907 || arginfo
[i
].linear_step
)
3910 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3911 if (arginfo
[i
].dt
!= vect_constant_def
3912 && arginfo
[i
].dt
!= vect_external_def
)
3915 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3916 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3917 if (arginfo
[i
].dt
== vect_constant_def
3918 || arginfo
[i
].dt
== vect_external_def
3919 || (arginfo
[i
].linear_step
3920 != n
->simdclone
->args
[i
].linear_step
))
3923 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3928 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3932 case SIMD_CLONE_ARG_TYPE_MASK
:
3935 if (i
== (size_t) -1)
3937 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3942 if (arginfo
[i
].align
)
3943 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3944 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3946 if (i
== (size_t) -1)
3948 if (bestn
== NULL
|| this_badness
< badness
)
3951 badness
= this_badness
;
3958 for (i
= 0; i
< nargs
; i
++)
3959 if ((arginfo
[i
].dt
== vect_constant_def
3960 || arginfo
[i
].dt
== vect_external_def
)
3961 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3963 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
3964 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
3966 if (arginfo
[i
].vectype
== NULL
3967 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
3968 simd_clone_subparts (arginfo
[i
].vectype
)))
3972 fndecl
= bestn
->decl
;
3973 nunits
= bestn
->simdclone
->simdlen
;
3974 ncopies
= vector_unroll_factor (vf
, nunits
);
3976 /* If the function isn't const, only allow it in simd loops where user
3977 has asserted that at least nunits consecutive iterations can be
3978 performed using SIMD instructions. */
3979 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
3980 && gimple_vuse (stmt
))
3983 /* Sanity check: make sure that at least one copy of the vectorized stmt
3984 needs to be generated. */
3985 gcc_assert (ncopies
>= 1);
3987 if (!vec_stmt
) /* transformation not required. */
3989 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3990 for (i
= 0; i
< nargs
; i
++)
3991 if ((bestn
->simdclone
->args
[i
].arg_type
3992 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3993 || (bestn
->simdclone
->args
[i
].arg_type
3994 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3996 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3999 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4000 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4001 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4002 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4003 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4004 tree sll
= arginfo
[i
].simd_lane_linear
4005 ? boolean_true_node
: boolean_false_node
;
4006 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4008 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4009 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4010 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4011 dt, slp_node, cost_vec); */
4017 if (dump_enabled_p ())
4018 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4021 scalar_dest
= gimple_call_lhs (stmt
);
4022 vec_dest
= NULL_TREE
;
4027 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4028 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4029 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4032 rtype
= TREE_TYPE (ratype
);
4036 auto_vec
<vec
<tree
> > vec_oprnds
;
4037 auto_vec
<unsigned> vec_oprnds_i
;
4038 vec_oprnds
.safe_grow_cleared (nargs
, true);
4039 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4040 for (j
= 0; j
< ncopies
; ++j
)
4042 /* Build argument list for the vectorized call. */
4044 vargs
.create (nargs
);
4048 for (i
= 0; i
< nargs
; i
++)
4050 unsigned int k
, l
, m
, o
;
4052 op
= gimple_call_arg (stmt
, i
);
4053 switch (bestn
->simdclone
->args
[i
].arg_type
)
4055 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4056 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4057 o
= vector_unroll_factor (nunits
,
4058 simd_clone_subparts (atype
));
4059 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4061 if (simd_clone_subparts (atype
)
4062 < simd_clone_subparts (arginfo
[i
].vectype
))
4064 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4065 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4066 / simd_clone_subparts (atype
));
4067 gcc_assert ((k
& (k
- 1)) == 0);
4070 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4071 ncopies
* o
/ k
, op
,
4073 vec_oprnds_i
[i
] = 0;
4074 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4078 vec_oprnd0
= arginfo
[i
].op
;
4079 if ((m
& (k
- 1)) == 0)
4080 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4082 arginfo
[i
].op
= vec_oprnd0
;
4084 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4086 bitsize_int ((m
& (k
- 1)) * prec
));
4088 = gimple_build_assign (make_ssa_name (atype
),
4090 vect_finish_stmt_generation (vinfo
, stmt_info
,
4092 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4096 k
= (simd_clone_subparts (atype
)
4097 / simd_clone_subparts (arginfo
[i
].vectype
));
4098 gcc_assert ((k
& (k
- 1)) == 0);
4099 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4101 vec_alloc (ctor_elts
, k
);
4104 for (l
= 0; l
< k
; l
++)
4106 if (m
== 0 && l
== 0)
4108 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4112 vec_oprnds_i
[i
] = 0;
4113 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4116 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4117 arginfo
[i
].op
= vec_oprnd0
;
4120 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4124 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4128 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4130 = gimple_build_assign (make_ssa_name (atype
),
4132 vect_finish_stmt_generation (vinfo
, stmt_info
,
4134 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4137 vargs
.safe_push (vec_oprnd0
);
4140 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4142 = gimple_build_assign (make_ssa_name (atype
),
4144 vect_finish_stmt_generation (vinfo
, stmt_info
,
4146 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4151 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4152 vargs
.safe_push (op
);
4154 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4155 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4160 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4161 &stmts
, true, NULL_TREE
);
4165 edge pe
= loop_preheader_edge (loop
);
4166 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4167 gcc_assert (!new_bb
);
4169 if (arginfo
[i
].simd_lane_linear
)
4171 vargs
.safe_push (arginfo
[i
].op
);
4174 tree phi_res
= copy_ssa_name (op
);
4175 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4176 add_phi_arg (new_phi
, arginfo
[i
].op
,
4177 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4179 = POINTER_TYPE_P (TREE_TYPE (op
))
4180 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4181 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4182 ? sizetype
: TREE_TYPE (op
);
4184 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4186 tree tcst
= wide_int_to_tree (type
, cst
);
4187 tree phi_arg
= copy_ssa_name (op
);
4189 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4190 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4191 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4192 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4194 arginfo
[i
].op
= phi_res
;
4195 vargs
.safe_push (phi_res
);
4200 = POINTER_TYPE_P (TREE_TYPE (op
))
4201 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4202 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4203 ? sizetype
: TREE_TYPE (op
);
4205 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4207 tree tcst
= wide_int_to_tree (type
, cst
);
4208 new_temp
= make_ssa_name (TREE_TYPE (op
));
4210 = gimple_build_assign (new_temp
, code
,
4211 arginfo
[i
].op
, tcst
);
4212 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4213 vargs
.safe_push (new_temp
);
4216 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4217 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4218 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4219 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4220 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4221 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4227 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4231 || known_eq (simd_clone_subparts (rtype
), nunits
));
4233 new_temp
= create_tmp_var (ratype
);
4234 else if (useless_type_conversion_p (vectype
, rtype
))
4235 new_temp
= make_ssa_name (vec_dest
, new_call
);
4237 new_temp
= make_ssa_name (rtype
, new_call
);
4238 gimple_call_set_lhs (new_call
, new_temp
);
4240 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4241 gimple
*new_stmt
= new_call
;
4245 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4248 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4249 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4250 k
= vector_unroll_factor (nunits
,
4251 simd_clone_subparts (vectype
));
4252 gcc_assert ((k
& (k
- 1)) == 0);
4253 for (l
= 0; l
< k
; l
++)
4258 t
= build_fold_addr_expr (new_temp
);
4259 t
= build2 (MEM_REF
, vectype
, t
,
4260 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4263 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4264 bitsize_int (prec
), bitsize_int (l
* prec
));
4265 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4266 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4268 if (j
== 0 && l
== 0)
4269 *vec_stmt
= new_stmt
;
4270 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4274 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4277 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4279 unsigned int k
= (simd_clone_subparts (vectype
)
4280 / simd_clone_subparts (rtype
));
4281 gcc_assert ((k
& (k
- 1)) == 0);
4282 if ((j
& (k
- 1)) == 0)
4283 vec_alloc (ret_ctor_elts
, k
);
4287 o
= vector_unroll_factor (nunits
,
4288 simd_clone_subparts (rtype
));
4289 for (m
= 0; m
< o
; m
++)
4291 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4292 size_int (m
), NULL_TREE
, NULL_TREE
);
4293 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4295 vect_finish_stmt_generation (vinfo
, stmt_info
,
4297 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4298 gimple_assign_lhs (new_stmt
));
4300 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4303 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4304 if ((j
& (k
- 1)) != k
- 1)
4306 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4308 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4309 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4311 if ((unsigned) j
== k
- 1)
4312 *vec_stmt
= new_stmt
;
4313 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4318 tree t
= build_fold_addr_expr (new_temp
);
4319 t
= build2 (MEM_REF
, vectype
, t
,
4320 build_int_cst (TREE_TYPE (t
), 0));
4321 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4322 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4323 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4325 else if (!useless_type_conversion_p (vectype
, rtype
))
4327 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4329 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4330 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4335 *vec_stmt
= new_stmt
;
4336 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4339 for (i
= 0; i
< nargs
; ++i
)
4341 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4346 /* The call in STMT might prevent it from being removed in dce.
4347 We however cannot remove it here, due to the way the ssa name
4348 it defines is mapped to the new definition. So just replace
4349 rhs of the statement with something harmless. */
4357 type
= TREE_TYPE (scalar_dest
);
4358 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4359 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4362 new_stmt
= gimple_build_nop ();
4363 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4364 unlink_stmt_vdef (stmt
);
4370 /* Function vect_gen_widened_results_half
4372 Create a vector stmt whose code, type, number of arguments, and result
4373 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4374 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4375 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4376 needs to be created (DECL is a function-decl of a target-builtin).
4377 STMT_INFO is the original scalar stmt that we are vectorizing. */
4380 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4381 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4382 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4383 stmt_vec_info stmt_info
)
4388 /* Generate half of the widened result: */
4389 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4390 if (op_type
!= binary_op
)
4392 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4393 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4394 gimple_assign_set_lhs (new_stmt
, new_temp
);
4395 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4401 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4402 For multi-step conversions store the resulting vectors and call the function
4406 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4408 stmt_vec_info stmt_info
,
4410 gimple_stmt_iterator
*gsi
,
4411 slp_tree slp_node
, enum tree_code code
)
4414 tree vop0
, vop1
, new_tmp
, vec_dest
;
4416 vec_dest
= vec_dsts
.pop ();
4418 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4420 /* Create demotion operation. */
4421 vop0
= (*vec_oprnds
)[i
];
4422 vop1
= (*vec_oprnds
)[i
+ 1];
4423 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4424 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4425 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4426 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4429 /* Store the resulting vector for next recursive call. */
4430 (*vec_oprnds
)[i
/2] = new_tmp
;
4433 /* This is the last step of the conversion sequence. Store the
4434 vectors in SLP_NODE or in vector info of the scalar statement
4435 (or in STMT_VINFO_RELATED_STMT chain). */
4437 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4439 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4443 /* For multi-step demotion operations we first generate demotion operations
4444 from the source type to the intermediate types, and then combine the
4445 results (stored in VEC_OPRNDS) in demotion operation to the destination
4449 /* At each level of recursion we have half of the operands we had at the
4451 vec_oprnds
->truncate ((i
+1)/2);
4452 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4454 stmt_info
, vec_dsts
, gsi
,
4455 slp_node
, VEC_PACK_TRUNC_EXPR
);
4458 vec_dsts
.quick_push (vec_dest
);
4462 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4463 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4464 STMT_INFO. For multi-step conversions store the resulting vectors and
4465 call the function recursively. */
4468 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4469 vec
<tree
> *vec_oprnds0
,
4470 vec
<tree
> *vec_oprnds1
,
4471 stmt_vec_info stmt_info
, tree vec_dest
,
4472 gimple_stmt_iterator
*gsi
,
4473 enum tree_code code1
,
4474 enum tree_code code2
, int op_type
)
4477 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4478 gimple
*new_stmt1
, *new_stmt2
;
4479 vec
<tree
> vec_tmp
= vNULL
;
4481 vec_tmp
.create (vec_oprnds0
->length () * 2);
4482 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4484 if (op_type
== binary_op
)
4485 vop1
= (*vec_oprnds1
)[i
];
4489 /* Generate the two halves of promotion operation. */
4490 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4491 op_type
, vec_dest
, gsi
,
4493 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4494 op_type
, vec_dest
, gsi
,
4496 if (is_gimple_call (new_stmt1
))
4498 new_tmp1
= gimple_call_lhs (new_stmt1
);
4499 new_tmp2
= gimple_call_lhs (new_stmt2
);
4503 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4504 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4507 /* Store the results for the next step. */
4508 vec_tmp
.quick_push (new_tmp1
);
4509 vec_tmp
.quick_push (new_tmp2
);
4512 vec_oprnds0
->release ();
4513 *vec_oprnds0
= vec_tmp
;
4517 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4518 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4519 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4520 Return true if STMT_INFO is vectorizable in this way. */
4523 vectorizable_conversion (vec_info
*vinfo
,
4524 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4525 gimple
**vec_stmt
, slp_tree slp_node
,
4526 stmt_vector_for_cost
*cost_vec
)
4530 tree op0
, op1
= NULL_TREE
;
4531 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4532 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4533 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4535 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4537 poly_uint64 nunits_in
;
4538 poly_uint64 nunits_out
;
4539 tree vectype_out
, vectype_in
;
4541 tree lhs_type
, rhs_type
;
4542 enum { NARROW
, NONE
, WIDEN
} modifier
;
4543 vec
<tree
> vec_oprnds0
= vNULL
;
4544 vec
<tree
> vec_oprnds1
= vNULL
;
4546 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4547 int multi_step_cvt
= 0;
4548 vec
<tree
> interm_types
= vNULL
;
4549 tree intermediate_type
, cvt_type
= NULL_TREE
;
4551 unsigned short fltsz
;
4553 /* Is STMT a vectorizable conversion? */
4555 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4558 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4562 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4566 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4569 code
= gimple_assign_rhs_code (stmt
);
4570 if (!CONVERT_EXPR_CODE_P (code
)
4571 && code
!= FIX_TRUNC_EXPR
4572 && code
!= FLOAT_EXPR
4573 && code
!= WIDEN_MULT_EXPR
4574 && code
!= WIDEN_LSHIFT_EXPR
)
4577 op_type
= TREE_CODE_LENGTH (code
);
4579 /* Check types of lhs and rhs. */
4580 scalar_dest
= gimple_assign_lhs (stmt
);
4581 lhs_type
= TREE_TYPE (scalar_dest
);
4582 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4584 /* Check the operands of the operation. */
4585 slp_tree slp_op0
, slp_op1
= NULL
;
4586 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4587 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4589 if (dump_enabled_p ())
4590 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4591 "use not simple.\n");
4595 rhs_type
= TREE_TYPE (op0
);
4596 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4597 && !((INTEGRAL_TYPE_P (lhs_type
)
4598 && INTEGRAL_TYPE_P (rhs_type
))
4599 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4600 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4603 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4604 && ((INTEGRAL_TYPE_P (lhs_type
)
4605 && !type_has_mode_precision_p (lhs_type
))
4606 || (INTEGRAL_TYPE_P (rhs_type
)
4607 && !type_has_mode_precision_p (rhs_type
))))
4609 if (dump_enabled_p ())
4610 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4611 "type conversion to/from bit-precision unsupported."
4616 if (op_type
== binary_op
)
4618 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4620 op1
= gimple_assign_rhs2 (stmt
);
4622 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4623 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4625 if (dump_enabled_p ())
4626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4627 "use not simple.\n");
4630 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4633 vectype_in
= vectype1_in
;
4636 /* If op0 is an external or constant def, infer the vector type
4637 from the scalar type. */
4639 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4641 gcc_assert (vectype_in
);
4644 if (dump_enabled_p ())
4645 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4646 "no vectype for scalar type %T\n", rhs_type
);
4651 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4652 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4654 if (dump_enabled_p ())
4655 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4656 "can't convert between boolean and non "
4657 "boolean vectors %T\n", rhs_type
);
4662 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4663 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4664 if (known_eq (nunits_out
, nunits_in
))
4666 else if (multiple_p (nunits_out
, nunits_in
))
4670 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4674 /* Multiple types in SLP are handled by creating the appropriate number of
4675 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4679 else if (modifier
== NARROW
)
4680 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4682 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4684 /* Sanity check: make sure that at least one copy of the vectorized stmt
4685 needs to be generated. */
4686 gcc_assert (ncopies
>= 1);
4688 bool found_mode
= false;
4689 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4690 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4691 opt_scalar_mode rhs_mode_iter
;
4693 /* Supportable by target? */
4697 if (code
!= FIX_TRUNC_EXPR
4698 && code
!= FLOAT_EXPR
4699 && !CONVERT_EXPR_CODE_P (code
))
4701 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4705 if (dump_enabled_p ())
4706 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4707 "conversion not supported by target.\n");
4711 if (supportable_widening_operation (vinfo
, code
, stmt_info
, vectype_out
,
4712 vectype_in
, &code1
, &code2
,
4713 &multi_step_cvt
, &interm_types
))
4715 /* Binary widening operation can only be supported directly by the
4717 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4721 if (code
!= FLOAT_EXPR
4722 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4725 fltsz
= GET_MODE_SIZE (lhs_mode
);
4726 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4728 rhs_mode
= rhs_mode_iter
.require ();
4729 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4733 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4734 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4735 if (cvt_type
== NULL_TREE
)
4738 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4740 if (!supportable_convert_operation (code
, vectype_out
,
4741 cvt_type
, &codecvt1
))
4744 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4745 vectype_out
, cvt_type
,
4746 &codecvt1
, &codecvt2
,
4751 gcc_assert (multi_step_cvt
== 0);
4753 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
4755 vectype_in
, &code1
, &code2
,
4756 &multi_step_cvt
, &interm_types
))
4766 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4767 codecvt2
= ERROR_MARK
;
4771 interm_types
.safe_push (cvt_type
);
4772 cvt_type
= NULL_TREE
;
4777 gcc_assert (op_type
== unary_op
);
4778 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4779 &code1
, &multi_step_cvt
,
4783 if (code
!= FIX_TRUNC_EXPR
4784 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4788 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4789 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4790 if (cvt_type
== NULL_TREE
)
4792 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4795 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4796 &code1
, &multi_step_cvt
,
4805 if (!vec_stmt
) /* transformation not required. */
4808 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
4809 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
4811 if (dump_enabled_p ())
4812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4813 "incompatible vector types for invariants\n");
4816 DUMP_VECT_SCOPE ("vectorizable_conversion");
4817 if (modifier
== NONE
)
4819 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4820 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4823 else if (modifier
== NARROW
)
4825 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4826 /* The final packing step produces one vector result per copy. */
4827 unsigned int nvectors
4828 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
4829 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4830 multi_step_cvt
, cost_vec
);
4834 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4835 /* The initial unpacking step produces two vector results
4836 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4837 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4838 unsigned int nvectors
4840 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
4842 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4843 multi_step_cvt
, cost_vec
);
4845 interm_types
.release ();
4850 if (dump_enabled_p ())
4851 dump_printf_loc (MSG_NOTE
, vect_location
,
4852 "transform conversion. ncopies = %d.\n", ncopies
);
4854 if (op_type
== binary_op
)
4856 if (CONSTANT_CLASS_P (op0
))
4857 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4858 else if (CONSTANT_CLASS_P (op1
))
4859 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4862 /* In case of multi-step conversion, we first generate conversion operations
4863 to the intermediate types, and then from that types to the final one.
4864 We create vector destinations for the intermediate type (TYPES) received
4865 from supportable_*_operation, and store them in the correct order
4866 for future use in vect_create_vectorized_*_stmts (). */
4867 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4868 vec_dest
= vect_create_destination_var (scalar_dest
,
4869 (cvt_type
&& modifier
== WIDEN
)
4870 ? cvt_type
: vectype_out
);
4871 vec_dsts
.quick_push (vec_dest
);
4875 for (i
= interm_types
.length () - 1;
4876 interm_types
.iterate (i
, &intermediate_type
); i
--)
4878 vec_dest
= vect_create_destination_var (scalar_dest
,
4880 vec_dsts
.quick_push (vec_dest
);
4885 vec_dest
= vect_create_destination_var (scalar_dest
,
4887 ? vectype_out
: cvt_type
);
4892 if (modifier
== WIDEN
)
4894 else if (modifier
== NARROW
)
4897 ninputs
= vect_pow2 (multi_step_cvt
);
4905 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
4907 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4909 /* Arguments are ready, create the new vector stmt. */
4910 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4911 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4912 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4913 gimple_assign_set_lhs (new_stmt
, new_temp
);
4914 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4917 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4919 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4924 /* In case the vectorization factor (VF) is bigger than the number
4925 of elements that we can fit in a vectype (nunits), we have to
4926 generate more than one vector stmt - i.e - we need to "unroll"
4927 the vector stmt by a factor VF/nunits. */
4928 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
4930 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
4932 if (code
== WIDEN_LSHIFT_EXPR
)
4934 vec_oprnds1
.create (ncopies
* ninputs
);
4935 for (i
= 0; i
< ncopies
* ninputs
; ++i
)
4936 vec_oprnds1
.quick_push (op1
);
4938 /* Arguments are ready. Create the new vector stmts. */
4939 for (i
= multi_step_cvt
; i
>= 0; i
--)
4941 tree this_dest
= vec_dsts
[i
];
4942 enum tree_code c1
= code1
, c2
= code2
;
4943 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4948 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
4949 &vec_oprnds1
, stmt_info
,
4954 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4959 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4960 new_temp
= make_ssa_name (vec_dest
);
4961 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
4962 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4965 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4968 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4970 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4975 /* In case the vectorization factor (VF) is bigger than the number
4976 of elements that we can fit in a vectype (nunits), we have to
4977 generate more than one vector stmt - i.e - we need to "unroll"
4978 the vector stmt by a factor VF/nunits. */
4979 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
4981 /* Arguments are ready. Create the new vector stmts. */
4983 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4985 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4986 new_temp
= make_ssa_name (vec_dest
);
4988 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
4989 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4990 vec_oprnds0
[i
] = new_temp
;
4993 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
4995 stmt_info
, vec_dsts
, gsi
,
5000 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5002 vec_oprnds0
.release ();
5003 vec_oprnds1
.release ();
5004 interm_types
.release ();
5009 /* Return true if we can assume from the scalar form of STMT_INFO that
5010 neither the scalar nor the vector forms will generate code. STMT_INFO
5011 is known not to involve a data reference. */
5014 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5016 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5020 tree lhs
= gimple_assign_lhs (stmt
);
5021 tree_code code
= gimple_assign_rhs_code (stmt
);
5022 tree rhs
= gimple_assign_rhs1 (stmt
);
5024 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5027 if (CONVERT_EXPR_CODE_P (code
))
5028 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5033 /* Function vectorizable_assignment.
5035 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5036 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5037 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5038 Return true if STMT_INFO is vectorizable in this way. */
5041 vectorizable_assignment (vec_info
*vinfo
,
5042 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5043 gimple
**vec_stmt
, slp_tree slp_node
,
5044 stmt_vector_for_cost
*cost_vec
)
5049 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5051 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5055 vec
<tree
> vec_oprnds
= vNULL
;
5057 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5058 enum tree_code code
;
5061 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5064 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5068 /* Is vectorizable assignment? */
5069 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5073 scalar_dest
= gimple_assign_lhs (stmt
);
5074 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5077 if (STMT_VINFO_DATA_REF (stmt_info
))
5080 code
= gimple_assign_rhs_code (stmt
);
5081 if (!(gimple_assign_single_p (stmt
)
5082 || code
== PAREN_EXPR
5083 || CONVERT_EXPR_CODE_P (code
)))
5086 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5087 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5089 /* Multiple types in SLP are handled by creating the appropriate number of
5090 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5095 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5097 gcc_assert (ncopies
>= 1);
5100 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5101 &dt
[0], &vectype_in
))
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5105 "use not simple.\n");
5109 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5111 /* We can handle NOP_EXPR conversions that do not change the number
5112 of elements or the vector size. */
5113 if ((CONVERT_EXPR_CODE_P (code
)
5114 || code
== VIEW_CONVERT_EXPR
)
5116 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5117 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5118 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5121 /* We do not handle bit-precision changes. */
5122 if ((CONVERT_EXPR_CODE_P (code
)
5123 || code
== VIEW_CONVERT_EXPR
)
5124 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5125 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5126 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5127 /* But a conversion that does not change the bit-pattern is ok. */
5128 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5129 > TYPE_PRECISION (TREE_TYPE (op
)))
5130 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5131 /* Conversion between boolean types of different sizes is
5132 a simple assignment in case their vectypes are same
5134 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5135 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5137 if (dump_enabled_p ())
5138 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5139 "type conversion to/from bit-precision "
5144 if (!vec_stmt
) /* transformation not required. */
5147 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5149 if (dump_enabled_p ())
5150 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5151 "incompatible vector types for invariants\n");
5154 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5155 DUMP_VECT_SCOPE ("vectorizable_assignment");
5156 if (!vect_nop_conversion_p (stmt_info
))
5157 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5163 if (dump_enabled_p ())
5164 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5167 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5170 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5172 /* Arguments are ready. create the new vector stmt. */
5173 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5175 if (CONVERT_EXPR_CODE_P (code
)
5176 || code
== VIEW_CONVERT_EXPR
)
5177 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5178 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5179 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5180 gimple_assign_set_lhs (new_stmt
, new_temp
);
5181 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5183 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5185 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5188 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5190 vec_oprnds
.release ();
5195 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5196 either as shift by a scalar or by a vector. */
5199 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5202 machine_mode vec_mode
;
5207 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5211 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5213 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5215 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5217 || (optab_handler (optab
, TYPE_MODE (vectype
))
5218 == CODE_FOR_nothing
))
5222 vec_mode
= TYPE_MODE (vectype
);
5223 icode
= (int) optab_handler (optab
, vec_mode
);
5224 if (icode
== CODE_FOR_nothing
)
5231 /* Function vectorizable_shift.
5233 Check if STMT_INFO performs a shift operation that can be vectorized.
5234 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5235 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5236 Return true if STMT_INFO is vectorizable in this way. */
5239 vectorizable_shift (vec_info
*vinfo
,
5240 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5241 gimple
**vec_stmt
, slp_tree slp_node
,
5242 stmt_vector_for_cost
*cost_vec
)
5246 tree op0
, op1
= NULL
;
5247 tree vec_oprnd1
= NULL_TREE
;
5249 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5250 enum tree_code code
;
5251 machine_mode vec_mode
;
5255 machine_mode optab_op2_mode
;
5256 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5258 poly_uint64 nunits_in
;
5259 poly_uint64 nunits_out
;
5264 vec
<tree
> vec_oprnds0
= vNULL
;
5265 vec
<tree
> vec_oprnds1
= vNULL
;
5268 bool scalar_shift_arg
= true;
5269 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5270 bool incompatible_op1_vectype_p
= false;
5272 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5275 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5276 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5280 /* Is STMT a vectorizable binary/unary operation? */
5281 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5285 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5288 code
= gimple_assign_rhs_code (stmt
);
5290 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5291 || code
== RROTATE_EXPR
))
5294 scalar_dest
= gimple_assign_lhs (stmt
);
5295 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5296 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5298 if (dump_enabled_p ())
5299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5300 "bit-precision shifts not supported.\n");
5305 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5306 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5308 if (dump_enabled_p ())
5309 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5310 "use not simple.\n");
5313 /* If op0 is an external or constant def, infer the vector type
5314 from the scalar type. */
5316 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5318 gcc_assert (vectype
);
5321 if (dump_enabled_p ())
5322 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5323 "no vectype for scalar type\n");
5327 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5328 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5329 if (maybe_ne (nunits_out
, nunits_in
))
5332 stmt_vec_info op1_def_stmt_info
;
5334 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5335 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5337 if (dump_enabled_p ())
5338 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5339 "use not simple.\n");
5343 /* Multiple types in SLP are handled by creating the appropriate number of
5344 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5349 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5351 gcc_assert (ncopies
>= 1);
5353 /* Determine whether the shift amount is a vector, or scalar. If the
5354 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5356 if ((dt
[1] == vect_internal_def
5357 || dt
[1] == vect_induction_def
5358 || dt
[1] == vect_nested_cycle
)
5360 scalar_shift_arg
= false;
5361 else if (dt
[1] == vect_constant_def
5362 || dt
[1] == vect_external_def
5363 || dt
[1] == vect_internal_def
)
5365 /* In SLP, need to check whether the shift count is the same,
5366 in loops if it is a constant or invariant, it is always
5370 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5371 stmt_vec_info slpstmt_info
;
5373 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5375 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5376 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5377 scalar_shift_arg
= false;
5380 /* For internal SLP defs we have to make sure we see scalar stmts
5381 for all vector elements.
5382 ??? For different vectors we could resort to a different
5383 scalar shift operand but code-generation below simply always
5385 if (dt
[1] == vect_internal_def
5386 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5388 scalar_shift_arg
= false;
5391 /* If the shift amount is computed by a pattern stmt we cannot
5392 use the scalar amount directly thus give up and use a vector
5394 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5395 scalar_shift_arg
= false;
5399 if (dump_enabled_p ())
5400 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5401 "operand mode requires invariant argument.\n");
5405 /* Vector shifted by vector. */
5406 bool was_scalar_shift_arg
= scalar_shift_arg
;
5407 if (!scalar_shift_arg
)
5409 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5410 if (dump_enabled_p ())
5411 dump_printf_loc (MSG_NOTE
, vect_location
,
5412 "vector/vector shift/rotate found.\n");
5415 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5417 incompatible_op1_vectype_p
5418 = (op1_vectype
== NULL_TREE
5419 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5420 TYPE_VECTOR_SUBPARTS (vectype
))
5421 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5422 if (incompatible_op1_vectype_p
5424 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5425 || slp_op1
->refcnt
!= 1))
5427 if (dump_enabled_p ())
5428 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5429 "unusable type for last operand in"
5430 " vector/vector shift/rotate.\n");
5434 /* See if the machine has a vector shifted by scalar insn and if not
5435 then see if it has a vector shifted by vector insn. */
5438 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5440 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5442 if (dump_enabled_p ())
5443 dump_printf_loc (MSG_NOTE
, vect_location
,
5444 "vector/scalar shift/rotate found.\n");
5448 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5450 && (optab_handler (optab
, TYPE_MODE (vectype
))
5451 != CODE_FOR_nothing
))
5453 scalar_shift_arg
= false;
5455 if (dump_enabled_p ())
5456 dump_printf_loc (MSG_NOTE
, vect_location
,
5457 "vector/vector shift/rotate found.\n");
5460 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5464 /* Unlike the other binary operators, shifts/rotates have
5465 the rhs being int, instead of the same type as the lhs,
5466 so make sure the scalar is the right type if we are
5467 dealing with vectors of long long/long/short/char. */
5468 incompatible_op1_vectype_p
5470 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5472 if (incompatible_op1_vectype_p
5473 && dt
[1] == vect_internal_def
)
5475 if (dump_enabled_p ())
5476 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5477 "unusable type for last operand in"
5478 " vector/vector shift/rotate.\n");
5485 /* Supportable by target? */
5488 if (dump_enabled_p ())
5489 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5493 vec_mode
= TYPE_MODE (vectype
);
5494 icode
= (int) optab_handler (optab
, vec_mode
);
5495 if (icode
== CODE_FOR_nothing
)
5497 if (dump_enabled_p ())
5498 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5499 "op not supported by target.\n");
5500 /* Check only during analysis. */
5501 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5503 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5505 if (dump_enabled_p ())
5506 dump_printf_loc (MSG_NOTE
, vect_location
,
5507 "proceeding using word mode.\n");
5510 /* Worthwhile without SIMD support? Check only during analysis. */
5512 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5513 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5515 if (dump_enabled_p ())
5516 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5517 "not worthwhile without SIMD support.\n");
5521 if (!vec_stmt
) /* transformation not required. */
5524 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5525 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5526 && (!incompatible_op1_vectype_p
5527 || dt
[1] == vect_constant_def
)
5528 && !vect_maybe_update_slp_op_vectype
5530 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5532 if (dump_enabled_p ())
5533 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5534 "incompatible vector types for invariants\n");
5537 /* Now adjust the constant shift amount in place. */
5539 && incompatible_op1_vectype_p
5540 && dt
[1] == vect_constant_def
)
5542 for (unsigned i
= 0;
5543 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5545 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5546 = fold_convert (TREE_TYPE (vectype
),
5547 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5548 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5552 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5553 DUMP_VECT_SCOPE ("vectorizable_shift");
5554 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5555 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5561 if (dump_enabled_p ())
5562 dump_printf_loc (MSG_NOTE
, vect_location
,
5563 "transform binary/unary operation.\n");
5565 if (incompatible_op1_vectype_p
&& !slp_node
)
5567 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
5568 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5569 if (dt
[1] != vect_constant_def
)
5570 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5571 TREE_TYPE (vectype
), NULL
);
5575 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5577 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
5579 /* Vector shl and shr insn patterns can be defined with scalar
5580 operand 2 (shift operand). In this case, use constant or loop
5581 invariant op1 directly, without extending it to vector mode
5583 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5584 if (!VECTOR_MODE_P (optab_op2_mode
))
5586 if (dump_enabled_p ())
5587 dump_printf_loc (MSG_NOTE
, vect_location
,
5588 "operand 1 using scalar mode.\n");
5590 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5591 vec_oprnds1
.quick_push (vec_oprnd1
);
5592 /* Store vec_oprnd1 for every vector stmt to be created.
5593 We check during the analysis that all the shift arguments
5595 TODO: Allow different constants for different vector
5596 stmts generated for an SLP instance. */
5598 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5599 vec_oprnds1
.quick_push (vec_oprnd1
);
5602 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
5604 if (was_scalar_shift_arg
)
5606 /* If the argument was the same in all lanes create
5607 the correctly typed vector shift amount directly. */
5608 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5609 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5610 !loop_vinfo
? gsi
: NULL
);
5611 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5612 !loop_vinfo
? gsi
: NULL
);
5613 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5614 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5615 vec_oprnds1
.quick_push (vec_oprnd1
);
5617 else if (dt
[1] == vect_constant_def
)
5618 /* The constant shift amount has been adjusted in place. */
5621 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5624 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5625 (a special case for certain kind of vector shifts); otherwise,
5626 operand 1 should be of a vector type (the usual case). */
5627 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5629 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5631 /* Arguments are ready. Create the new vector stmt. */
5632 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5634 /* For internal defs where we need to use a scalar shift arg
5635 extract the first lane. */
5636 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
5638 vop1
= vec_oprnds1
[0];
5639 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
5641 = gimple_build_assign (new_temp
,
5642 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
5644 TYPE_SIZE (TREE_TYPE (new_temp
)),
5645 bitsize_zero_node
));
5646 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5650 vop1
= vec_oprnds1
[i
];
5651 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5652 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5653 gimple_assign_set_lhs (new_stmt
, new_temp
);
5654 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5656 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5658 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5662 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5664 vec_oprnds0
.release ();
5665 vec_oprnds1
.release ();
5671 /* Function vectorizable_operation.
5673 Check if STMT_INFO performs a binary, unary or ternary operation that can
5675 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5676 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5677 Return true if STMT_INFO is vectorizable in this way. */
5680 vectorizable_operation (vec_info
*vinfo
,
5681 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5682 gimple
**vec_stmt
, slp_tree slp_node
,
5683 stmt_vector_for_cost
*cost_vec
)
5687 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5689 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5690 enum tree_code code
, orig_code
;
5691 machine_mode vec_mode
;
5695 bool target_support_p
;
5696 enum vect_def_type dt
[3]
5697 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5699 poly_uint64 nunits_in
;
5700 poly_uint64 nunits_out
;
5702 int ncopies
, vec_num
;
5704 vec
<tree
> vec_oprnds0
= vNULL
;
5705 vec
<tree
> vec_oprnds1
= vNULL
;
5706 vec
<tree
> vec_oprnds2
= vNULL
;
5707 tree vop0
, vop1
, vop2
;
5708 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5710 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5713 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5717 /* Is STMT a vectorizable binary/unary operation? */
5718 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5722 /* Loads and stores are handled in vectorizable_{load,store}. */
5723 if (STMT_VINFO_DATA_REF (stmt_info
))
5726 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5728 /* Shifts are handled in vectorizable_shift. */
5729 if (code
== LSHIFT_EXPR
5730 || code
== RSHIFT_EXPR
5731 || code
== LROTATE_EXPR
5732 || code
== RROTATE_EXPR
)
5735 /* Comparisons are handled in vectorizable_comparison. */
5736 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
5739 /* Conditions are handled in vectorizable_condition. */
5740 if (code
== COND_EXPR
)
5743 /* For pointer addition and subtraction, we should use the normal
5744 plus and minus for the vector operation. */
5745 if (code
== POINTER_PLUS_EXPR
)
5747 if (code
== POINTER_DIFF_EXPR
)
5750 /* Support only unary or binary operations. */
5751 op_type
= TREE_CODE_LENGTH (code
);
5752 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5754 if (dump_enabled_p ())
5755 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5756 "num. args = %d (not unary/binary/ternary op).\n",
5761 scalar_dest
= gimple_assign_lhs (stmt
);
5762 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5764 /* Most operations cannot handle bit-precision types without extra
5766 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
5768 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5769 /* Exception are bitwise binary operations. */
5770 && code
!= BIT_IOR_EXPR
5771 && code
!= BIT_XOR_EXPR
5772 && code
!= BIT_AND_EXPR
)
5774 if (dump_enabled_p ())
5775 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5776 "bit-precision arithmetic not supported.\n");
5781 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5782 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5784 if (dump_enabled_p ())
5785 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5786 "use not simple.\n");
5789 /* If op0 is an external or constant def, infer the vector type
5790 from the scalar type. */
5793 /* For boolean type we cannot determine vectype by
5794 invariant value (don't know whether it is a vector
5795 of booleans or vector of integers). We use output
5796 vectype because operations on boolean don't change
5798 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5800 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5802 if (dump_enabled_p ())
5803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5804 "not supported operation on bool value.\n");
5807 vectype
= vectype_out
;
5810 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
5814 gcc_assert (vectype
);
5817 if (dump_enabled_p ())
5818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5819 "no vectype for scalar type %T\n",
5825 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5826 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5827 if (maybe_ne (nunits_out
, nunits_in
))
5830 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
5831 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
5832 if (op_type
== binary_op
|| op_type
== ternary_op
)
5834 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5835 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
5837 if (dump_enabled_p ())
5838 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5839 "use not simple.\n");
5843 if (op_type
== ternary_op
)
5845 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5846 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
5848 if (dump_enabled_p ())
5849 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5850 "use not simple.\n");
5855 /* Multiple types in SLP are handled by creating the appropriate number of
5856 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5861 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5865 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5869 gcc_assert (ncopies
>= 1);
5871 /* Reject attempts to combine mask types with nonmask types, e.g. if
5872 we have an AND between a (nonmask) boolean loaded from memory and
5873 a (mask) boolean result of a comparison.
5875 TODO: We could easily fix these cases up using pattern statements. */
5876 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
5877 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
5878 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
5880 if (dump_enabled_p ())
5881 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5882 "mixed mask and nonmask vector types\n");
5886 /* Supportable by target? */
5888 vec_mode
= TYPE_MODE (vectype
);
5889 if (code
== MULT_HIGHPART_EXPR
)
5890 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5893 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5896 if (dump_enabled_p ())
5897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5901 target_support_p
= (optab_handler (optab
, vec_mode
)
5902 != CODE_FOR_nothing
);
5905 if (!target_support_p
)
5907 if (dump_enabled_p ())
5908 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5909 "op not supported by target.\n");
5910 /* Check only during analysis. */
5911 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5912 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5914 if (dump_enabled_p ())
5915 dump_printf_loc (MSG_NOTE
, vect_location
,
5916 "proceeding using word mode.\n");
5919 /* Worthwhile without SIMD support? Check only during analysis. */
5920 if (!VECTOR_MODE_P (vec_mode
)
5922 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5924 if (dump_enabled_p ())
5925 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5926 "not worthwhile without SIMD support.\n");
5930 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
5931 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
5932 internal_fn cond_fn
= get_conditional_internal_fn (code
);
5934 if (!vec_stmt
) /* transformation not required. */
5936 /* If this operation is part of a reduction, a fully-masked loop
5937 should only change the active lanes of the reduction chain,
5938 keeping the inactive lanes as-is. */
5940 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5943 if (cond_fn
== IFN_LAST
5944 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
5945 OPTIMIZE_FOR_SPEED
))
5947 if (dump_enabled_p ())
5948 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5949 "can't use a fully-masked loop because no"
5950 " conditional operation is available.\n");
5951 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
5954 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
5958 /* Put types on constant and invariant SLP children. */
5960 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5961 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
5962 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
5964 if (dump_enabled_p ())
5965 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5966 "incompatible vector types for invariants\n");
5970 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5971 DUMP_VECT_SCOPE ("vectorizable_operation");
5972 vect_model_simple_cost (vinfo
, stmt_info
,
5973 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5979 if (dump_enabled_p ())
5980 dump_printf_loc (MSG_NOTE
, vect_location
,
5981 "transform binary/unary operation.\n");
5983 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
5985 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5986 vectors with unsigned elements, but the result is signed. So, we
5987 need to compute the MINUS_EXPR into vectype temporary and
5988 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5989 tree vec_cvt_dest
= NULL_TREE
;
5990 if (orig_code
== POINTER_DIFF_EXPR
)
5992 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5993 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5997 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5999 /* In case the vectorization factor (VF) is bigger than the number
6000 of elements that we can fit in a vectype (nunits), we have to generate
6001 more than one vector stmt - i.e - we need to "unroll" the
6002 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6003 from one copy of the vector stmt to the next, in the field
6004 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6005 stages to find the correct vector defs to be used when vectorizing
6006 stmts that use the defs of the current stmt. The example below
6007 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6008 we need to create 4 vectorized stmts):
6010 before vectorization:
6011 RELATED_STMT VEC_STMT
6015 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6017 RELATED_STMT VEC_STMT
6018 VS1_0: vx0 = memref0 VS1_1 -
6019 VS1_1: vx1 = memref1 VS1_2 -
6020 VS1_2: vx2 = memref2 VS1_3 -
6021 VS1_3: vx3 = memref3 - -
6022 S1: x = load - VS1_0
6025 step2: vectorize stmt S2 (done here):
6026 To vectorize stmt S2 we first need to find the relevant vector
6027 def for the first operand 'x'. This is, as usual, obtained from
6028 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6029 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6030 relevant vector def 'vx0'. Having found 'vx0' we can generate
6031 the vector stmt VS2_0, and as usual, record it in the
6032 STMT_VINFO_VEC_STMT of stmt S2.
6033 When creating the second copy (VS2_1), we obtain the relevant vector
6034 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6035 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6036 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6037 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6038 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6039 chain of stmts and pointers:
6040 RELATED_STMT VEC_STMT
6041 VS1_0: vx0 = memref0 VS1_1 -
6042 VS1_1: vx1 = memref1 VS1_2 -
6043 VS1_2: vx2 = memref2 VS1_3 -
6044 VS1_3: vx3 = memref3 - -
6045 S1: x = load - VS1_0
6046 VS2_0: vz0 = vx0 + v1 VS2_1 -
6047 VS2_1: vz1 = vx1 + v1 VS2_2 -
6048 VS2_2: vz2 = vx2 + v1 VS2_3 -
6049 VS2_3: vz3 = vx3 + v1 - -
6050 S2: z = x + 1 - VS2_0 */
6052 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6053 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6054 /* Arguments are ready. Create the new vector stmt. */
6055 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6057 gimple
*new_stmt
= NULL
;
6058 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6059 ? vec_oprnds1
[i
] : NULL_TREE
);
6060 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6061 if (masked_loop_p
&& reduc_idx
>= 0)
6063 /* Perform the operation on active elements only and take
6064 inactive elements from the reduction chain input. */
6066 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6067 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6069 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6071 new_temp
= make_ssa_name (vec_dest
, call
);
6072 gimple_call_set_lhs (call
, new_temp
);
6073 gimple_call_set_nothrow (call
, true);
6074 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6079 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6080 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6081 gimple_assign_set_lhs (new_stmt
, new_temp
);
6082 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6085 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6086 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6088 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6089 gimple_assign_set_lhs (new_stmt
, new_temp
);
6090 vect_finish_stmt_generation (vinfo
, stmt_info
,
6095 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6097 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6101 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6103 vec_oprnds0
.release ();
6104 vec_oprnds1
.release ();
6105 vec_oprnds2
.release ();
6110 /* A helper function to ensure data reference DR_INFO's base alignment. */
6113 ensure_base_align (dr_vec_info
*dr_info
)
6115 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6118 if (dr_info
->base_misaligned
)
6120 tree base_decl
= dr_info
->base_decl
;
6122 // We should only be able to increase the alignment of a base object if
6123 // we know what its new alignment should be at compile time.
6124 unsigned HOST_WIDE_INT align_base_to
=
6125 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6127 if (decl_in_symtab_p (base_decl
))
6128 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6129 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6131 SET_DECL_ALIGN (base_decl
, align_base_to
);
6132 DECL_USER_ALIGN (base_decl
) = 1;
6134 dr_info
->base_misaligned
= false;
6139 /* Function get_group_alias_ptr_type.
6141 Return the alias type for the group starting at FIRST_STMT_INFO. */
6144 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6146 struct data_reference
*first_dr
, *next_dr
;
6148 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6149 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6150 while (next_stmt_info
)
6152 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6153 if (get_alias_set (DR_REF (first_dr
))
6154 != get_alias_set (DR_REF (next_dr
)))
6156 if (dump_enabled_p ())
6157 dump_printf_loc (MSG_NOTE
, vect_location
,
6158 "conflicting alias set types.\n");
6159 return ptr_type_node
;
6161 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6163 return reference_alias_ptr_type (DR_REF (first_dr
));
6167 /* Function scan_operand_equal_p.
6169 Helper function for check_scan_store. Compare two references
6170 with .GOMP_SIMD_LANE bases. */
6173 scan_operand_equal_p (tree ref1
, tree ref2
)
6175 tree ref
[2] = { ref1
, ref2
};
6176 poly_int64 bitsize
[2], bitpos
[2];
6177 tree offset
[2], base
[2];
6178 for (int i
= 0; i
< 2; ++i
)
6181 int unsignedp
, reversep
, volatilep
= 0;
6182 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6183 &offset
[i
], &mode
, &unsignedp
,
6184 &reversep
, &volatilep
);
6185 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6187 if (TREE_CODE (base
[i
]) == MEM_REF
6188 && offset
[i
] == NULL_TREE
6189 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6191 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6192 if (is_gimple_assign (def_stmt
)
6193 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6194 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6195 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6197 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6199 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6200 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6205 if (!operand_equal_p (base
[0], base
[1], 0))
6207 if (maybe_ne (bitsize
[0], bitsize
[1]))
6209 if (offset
[0] != offset
[1])
6211 if (!offset
[0] || !offset
[1])
6213 if (!operand_equal_p (offset
[0], offset
[1], 0))
6216 for (int i
= 0; i
< 2; ++i
)
6218 step
[i
] = integer_one_node
;
6219 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6221 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6222 if (is_gimple_assign (def_stmt
)
6223 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6224 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6227 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6228 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6231 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6233 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6234 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6236 tree rhs1
= NULL_TREE
;
6237 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6239 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6240 if (gimple_assign_cast_p (def_stmt
))
6241 rhs1
= gimple_assign_rhs1 (def_stmt
);
6243 else if (CONVERT_EXPR_P (offset
[i
]))
6244 rhs1
= TREE_OPERAND (offset
[i
], 0);
6246 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6247 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6248 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6249 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6252 if (!operand_equal_p (offset
[0], offset
[1], 0)
6253 || !operand_equal_p (step
[0], step
[1], 0))
6261 enum scan_store_kind
{
6262 /* Normal permutation. */
6263 scan_store_kind_perm
,
6265 /* Whole vector left shift permutation with zero init. */
6266 scan_store_kind_lshift_zero
,
6268 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6269 scan_store_kind_lshift_cond
6272 /* Function check_scan_store.
6274 Verify if we can perform the needed permutations or whole vector shifts.
6275 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6276 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6277 to do at each step. */
6280 scan_store_can_perm_p (tree vectype
, tree init
,
6281 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6283 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6284 unsigned HOST_WIDE_INT nunits
;
6285 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6287 int units_log2
= exact_log2 (nunits
);
6288 if (units_log2
<= 0)
6292 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6293 for (i
= 0; i
<= units_log2
; ++i
)
6295 unsigned HOST_WIDE_INT j
, k
;
6296 enum scan_store_kind kind
= scan_store_kind_perm
;
6297 vec_perm_builder
sel (nunits
, nunits
, 1);
6298 sel
.quick_grow (nunits
);
6299 if (i
== units_log2
)
6301 for (j
= 0; j
< nunits
; ++j
)
6302 sel
[j
] = nunits
- 1;
6306 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6308 for (k
= 0; j
< nunits
; ++j
, ++k
)
6309 sel
[j
] = nunits
+ k
;
6311 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6312 if (!can_vec_perm_const_p (vec_mode
, indices
))
6314 if (i
== units_log2
)
6317 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6319 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6321 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6322 /* Whole vector shifts shift in zeros, so if init is all zero
6323 constant, there is no need to do anything further. */
6324 if ((TREE_CODE (init
) != INTEGER_CST
6325 && TREE_CODE (init
) != REAL_CST
)
6326 || !initializer_zerop (init
))
6328 tree masktype
= truth_type_for (vectype
);
6329 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6331 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6334 kind
= whole_vector_shift_kind
;
6336 if (use_whole_vector
)
6338 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6339 use_whole_vector
->safe_grow_cleared (i
, true);
6340 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6341 use_whole_vector
->safe_push (kind
);
6349 /* Function check_scan_store.
6351 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6354 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6355 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6356 vect_memory_access_type memory_access_type
)
6358 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6359 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6362 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6365 || memory_access_type
!= VMAT_CONTIGUOUS
6366 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6367 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6368 || loop_vinfo
== NULL
6369 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6370 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6371 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6372 || !integer_zerop (DR_INIT (dr_info
->dr
))
6373 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6374 || !alias_sets_conflict_p (get_alias_set (vectype
),
6375 get_alias_set (TREE_TYPE (ref_type
))))
6377 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6379 "unsupported OpenMP scan store.\n");
6383 /* We need to pattern match code built by OpenMP lowering and simplified
6384 by following optimizations into something we can handle.
6385 #pragma omp simd reduction(inscan,+:r)
6389 #pragma omp scan inclusive (r)
6392 shall have body with:
6393 // Initialization for input phase, store the reduction initializer:
6394 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6395 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6397 // Actual input phase:
6399 r.0_5 = D.2042[_20];
6402 // Initialization for scan phase:
6403 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6409 // Actual scan phase:
6411 r.1_8 = D.2042[_20];
6413 The "omp simd array" variable D.2042 holds the privatized copy used
6414 inside of the loop and D.2043 is another one that holds copies of
6415 the current original list item. The separate GOMP_SIMD_LANE ifn
6416 kinds are there in order to allow optimizing the initializer store
6417 and combiner sequence, e.g. if it is originally some C++ish user
6418 defined reduction, but allow the vectorizer to pattern recognize it
6419 and turn into the appropriate vectorized scan.
6421 For exclusive scan, this is slightly different:
6422 #pragma omp simd reduction(inscan,+:r)
6426 #pragma omp scan exclusive (r)
6429 shall have body with:
6430 // Initialization for input phase, store the reduction initializer:
6431 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6432 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6434 // Actual input phase:
6436 r.0_5 = D.2042[_20];
6439 // Initialization for scan phase:
6440 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6446 // Actual scan phase:
6448 r.1_8 = D.2044[_20];
6451 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6453 /* Match the D.2042[_21] = 0; store above. Just require that
6454 it is a constant or external definition store. */
6455 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6458 if (dump_enabled_p ())
6459 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6460 "unsupported OpenMP scan initializer store.\n");
6464 if (! loop_vinfo
->scan_map
)
6465 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6466 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6467 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6470 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6472 /* These stores can be vectorized normally. */
6476 if (rhs_dt
!= vect_internal_def
)
6479 if (dump_enabled_p ())
6480 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6481 "unsupported OpenMP scan combiner pattern.\n");
6485 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6486 tree rhs
= gimple_assign_rhs1 (stmt
);
6487 if (TREE_CODE (rhs
) != SSA_NAME
)
6490 gimple
*other_store_stmt
= NULL
;
6491 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6492 bool inscan_var_store
6493 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6495 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6497 if (!inscan_var_store
)
6499 use_operand_p use_p
;
6500 imm_use_iterator iter
;
6501 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6503 gimple
*use_stmt
= USE_STMT (use_p
);
6504 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6506 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6507 || !is_gimple_assign (use_stmt
)
6508 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6510 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6512 other_store_stmt
= use_stmt
;
6514 if (other_store_stmt
== NULL
)
6516 rhs
= gimple_assign_lhs (other_store_stmt
);
6517 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6521 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6523 use_operand_p use_p
;
6524 imm_use_iterator iter
;
6525 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6527 gimple
*use_stmt
= USE_STMT (use_p
);
6528 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6530 if (other_store_stmt
)
6532 other_store_stmt
= use_stmt
;
6538 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6539 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6540 || !is_gimple_assign (def_stmt
)
6541 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6544 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6545 /* For pointer addition, we should use the normal plus for the vector
6549 case POINTER_PLUS_EXPR
:
6552 case MULT_HIGHPART_EXPR
:
6557 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6560 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6561 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6562 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6565 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6566 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6567 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6568 || !gimple_assign_load_p (load1_stmt
)
6569 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6570 || !gimple_assign_load_p (load2_stmt
))
6573 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6574 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6575 if (load1_stmt_info
== NULL
6576 || load2_stmt_info
== NULL
6577 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6578 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6579 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6580 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6583 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6585 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6586 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6587 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6589 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6591 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6595 use_operand_p use_p
;
6596 imm_use_iterator iter
;
6597 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6599 gimple
*use_stmt
= USE_STMT (use_p
);
6600 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6602 if (other_store_stmt
)
6604 other_store_stmt
= use_stmt
;
6608 if (other_store_stmt
== NULL
)
6610 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6611 || !gimple_store_p (other_store_stmt
))
6614 stmt_vec_info other_store_stmt_info
6615 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6616 if (other_store_stmt_info
== NULL
6617 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6618 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6621 gimple
*stmt1
= stmt
;
6622 gimple
*stmt2
= other_store_stmt
;
6623 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6624 std::swap (stmt1
, stmt2
);
6625 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6626 gimple_assign_rhs1 (load2_stmt
)))
6628 std::swap (rhs1
, rhs2
);
6629 std::swap (load1_stmt
, load2_stmt
);
6630 std::swap (load1_stmt_info
, load2_stmt_info
);
6632 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6633 gimple_assign_rhs1 (load1_stmt
)))
6636 tree var3
= NULL_TREE
;
6637 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6638 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6639 gimple_assign_rhs1 (load2_stmt
)))
6641 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6643 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6644 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6645 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6647 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6648 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6649 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6650 || lookup_attribute ("omp simd inscan exclusive",
6651 DECL_ATTRIBUTES (var3
)))
6655 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6656 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6657 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6660 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6661 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6662 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6663 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6664 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6665 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6668 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6669 std::swap (var1
, var2
);
6671 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6673 if (!lookup_attribute ("omp simd inscan exclusive",
6674 DECL_ATTRIBUTES (var1
)))
6679 if (loop_vinfo
->scan_map
== NULL
)
6681 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6685 /* The IL is as expected, now check if we can actually vectorize it.
6692 should be vectorized as (where _40 is the vectorized rhs
6693 from the D.2042[_21] = 0; store):
6694 _30 = MEM <vector(8) int> [(int *)&D.2043];
6695 _31 = MEM <vector(8) int> [(int *)&D.2042];
6696 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6698 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6699 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6701 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6702 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6703 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6705 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6706 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6708 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6709 MEM <vector(8) int> [(int *)&D.2043] = _39;
6710 MEM <vector(8) int> [(int *)&D.2042] = _38;
6717 should be vectorized as (where _40 is the vectorized rhs
6718 from the D.2042[_21] = 0; store):
6719 _30 = MEM <vector(8) int> [(int *)&D.2043];
6720 _31 = MEM <vector(8) int> [(int *)&D.2042];
6721 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6722 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6724 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6725 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6726 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6728 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6729 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6730 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6732 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6733 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6736 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6737 MEM <vector(8) int> [(int *)&D.2044] = _39;
6738 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6739 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6740 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6741 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
6744 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
6745 if (units_log2
== -1)
6752 /* Function vectorizable_scan_store.
6754 Helper of vectorizable_score, arguments like on vectorizable_store.
6755 Handle only the transformation, checking is done in check_scan_store. */
6758 vectorizable_scan_store (vec_info
*vinfo
,
6759 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6760 gimple
**vec_stmt
, int ncopies
)
6762 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6763 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6764 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
6765 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6767 if (dump_enabled_p ())
6768 dump_printf_loc (MSG_NOTE
, vect_location
,
6769 "transform scan store. ncopies = %d\n", ncopies
);
6771 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6772 tree rhs
= gimple_assign_rhs1 (stmt
);
6773 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
6775 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6776 bool inscan_var_store
6777 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6779 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6781 use_operand_p use_p
;
6782 imm_use_iterator iter
;
6783 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6785 gimple
*use_stmt
= USE_STMT (use_p
);
6786 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6788 rhs
= gimple_assign_lhs (use_stmt
);
6793 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6794 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6795 if (code
== POINTER_PLUS_EXPR
)
6797 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
6798 && commutative_tree_code (code
));
6799 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6800 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6801 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
6802 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6803 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6804 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6805 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6806 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6807 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6808 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6809 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6811 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6813 std::swap (rhs1
, rhs2
);
6814 std::swap (var1
, var2
);
6815 std::swap (load1_dr_info
, load2_dr_info
);
6818 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6821 unsigned HOST_WIDE_INT nunits
;
6822 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6824 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
6825 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
6826 gcc_assert (units_log2
> 0);
6827 auto_vec
<tree
, 16> perms
;
6828 perms
.quick_grow (units_log2
+ 1);
6829 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
6830 for (int i
= 0; i
<= units_log2
; ++i
)
6832 unsigned HOST_WIDE_INT j
, k
;
6833 vec_perm_builder
sel (nunits
, nunits
, 1);
6834 sel
.quick_grow (nunits
);
6835 if (i
== units_log2
)
6836 for (j
= 0; j
< nunits
; ++j
)
6837 sel
[j
] = nunits
- 1;
6840 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6842 for (k
= 0; j
< nunits
; ++j
, ++k
)
6843 sel
[j
] = nunits
+ k
;
6845 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6846 if (!use_whole_vector
.is_empty ()
6847 && use_whole_vector
[i
] != scan_store_kind_perm
)
6849 if (zero_vec
== NULL_TREE
)
6850 zero_vec
= build_zero_cst (vectype
);
6851 if (masktype
== NULL_TREE
6852 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
6853 masktype
= truth_type_for (vectype
);
6854 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
6857 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
6860 tree vec_oprnd1
= NULL_TREE
;
6861 tree vec_oprnd2
= NULL_TREE
;
6862 tree vec_oprnd3
= NULL_TREE
;
6863 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
6864 tree dataref_offset
= build_int_cst (ref_type
, 0);
6865 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
6866 vectype
, VMAT_CONTIGUOUS
);
6867 tree ldataref_ptr
= NULL_TREE
;
6868 tree orig
= NULL_TREE
;
6869 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6870 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
6871 auto_vec
<tree
> vec_oprnds1
;
6872 auto_vec
<tree
> vec_oprnds2
;
6873 auto_vec
<tree
> vec_oprnds3
;
6874 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
6875 *init
, &vec_oprnds1
,
6876 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
6877 rhs2
, &vec_oprnds3
);
6878 for (int j
= 0; j
< ncopies
; j
++)
6880 vec_oprnd1
= vec_oprnds1
[j
];
6881 if (ldataref_ptr
== NULL
)
6882 vec_oprnd2
= vec_oprnds2
[j
];
6883 vec_oprnd3
= vec_oprnds3
[j
];
6886 else if (!inscan_var_store
)
6887 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
6891 vec_oprnd2
= make_ssa_name (vectype
);
6892 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
6893 unshare_expr (ldataref_ptr
),
6895 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
6896 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
6897 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6898 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6899 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6902 tree v
= vec_oprnd2
;
6903 for (int i
= 0; i
< units_log2
; ++i
)
6905 tree new_temp
= make_ssa_name (vectype
);
6906 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
6908 && (use_whole_vector
[i
]
6909 != scan_store_kind_perm
))
6910 ? zero_vec
: vec_oprnd1
, v
,
6912 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6913 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6914 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6916 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
6918 /* Whole vector shift shifted in zero bits, but if *init
6919 is not initializer_zerop, we need to replace those elements
6920 with elements from vec_oprnd1. */
6921 tree_vector_builder
vb (masktype
, nunits
, 1);
6922 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
6923 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
6924 ? boolean_false_node
: boolean_true_node
);
6926 tree new_temp2
= make_ssa_name (vectype
);
6927 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
6928 new_temp
, vec_oprnd1
);
6929 vect_finish_stmt_generation (vinfo
, stmt_info
,
6931 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6932 new_temp
= new_temp2
;
6935 /* For exclusive scan, perform the perms[i] permutation once
6938 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
6946 tree new_temp2
= make_ssa_name (vectype
);
6947 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
6948 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6949 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6954 tree new_temp
= make_ssa_name (vectype
);
6955 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
6956 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6957 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6959 tree last_perm_arg
= new_temp
;
6960 /* For exclusive scan, new_temp computed above is the exclusive scan
6961 prefix sum. Turn it into inclusive prefix sum for the broadcast
6962 of the last element into orig. */
6963 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6965 last_perm_arg
= make_ssa_name (vectype
);
6966 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
6967 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6968 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6971 orig
= make_ssa_name (vectype
);
6972 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
6973 last_perm_arg
, perms
[units_log2
]);
6974 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6975 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6977 if (!inscan_var_store
)
6979 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
6980 unshare_expr (dataref_ptr
),
6982 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
6983 g
= gimple_build_assign (data_ref
, new_temp
);
6984 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6985 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6989 if (inscan_var_store
)
6990 for (int j
= 0; j
< ncopies
; j
++)
6993 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
6995 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
6996 unshare_expr (dataref_ptr
),
6998 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
6999 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7000 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7001 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7007 /* Function vectorizable_store.
7009 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7010 that can be vectorized.
7011 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7012 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7013 Return true if STMT_INFO is vectorizable in this way. */
7016 vectorizable_store (vec_info
*vinfo
,
7017 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7018 gimple
**vec_stmt
, slp_tree slp_node
,
7019 stmt_vector_for_cost
*cost_vec
)
7023 tree vec_oprnd
= NULL_TREE
;
7025 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7026 class loop
*loop
= NULL
;
7027 machine_mode vec_mode
;
7029 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7030 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7031 tree dataref_ptr
= NULL_TREE
;
7032 tree dataref_offset
= NULL_TREE
;
7033 gimple
*ptr_incr
= NULL
;
7036 stmt_vec_info first_stmt_info
;
7038 unsigned int group_size
, i
;
7039 vec
<tree
> oprnds
= vNULL
;
7040 vec
<tree
> result_chain
= vNULL
;
7041 tree offset
= NULL_TREE
;
7042 vec
<tree
> vec_oprnds
= vNULL
;
7043 bool slp
= (slp_node
!= NULL
);
7044 unsigned int vec_num
;
7045 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7047 gather_scatter_info gs_info
;
7049 vec_load_store_type vls_type
;
7052 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7055 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7059 /* Is vectorizable store? */
7061 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7062 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7064 tree scalar_dest
= gimple_assign_lhs (assign
);
7065 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7066 && is_pattern_stmt_p (stmt_info
))
7067 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7068 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7069 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7070 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7071 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7072 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7073 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7074 && TREE_CODE (scalar_dest
) != MEM_REF
)
7079 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7080 if (!call
|| !gimple_call_internal_p (call
))
7083 internal_fn ifn
= gimple_call_internal_fn (call
);
7084 if (!internal_store_fn_p (ifn
))
7087 if (slp_node
!= NULL
)
7089 if (dump_enabled_p ())
7090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7091 "SLP of masked stores not supported.\n");
7095 int mask_index
= internal_fn_mask_index (ifn
);
7096 if (mask_index
>= 0)
7098 mask
= gimple_call_arg (call
, mask_index
);
7099 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
7105 op
= vect_get_store_rhs (stmt_info
);
7107 /* Cannot have hybrid store SLP -- that would mean storing to the
7108 same location twice. */
7109 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7111 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7112 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7116 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7117 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7122 /* Multiple types in SLP are handled by creating the appropriate number of
7123 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7128 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7130 gcc_assert (ncopies
>= 1);
7132 /* FORNOW. This restriction should be relaxed. */
7133 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7135 if (dump_enabled_p ())
7136 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7137 "multiple types in nested loop.\n");
7141 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7142 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7145 elem_type
= TREE_TYPE (vectype
);
7146 vec_mode
= TYPE_MODE (vectype
);
7148 if (!STMT_VINFO_DATA_REF (stmt_info
))
7151 vect_memory_access_type memory_access_type
;
7152 enum dr_alignment_support alignment_support_scheme
;
7153 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7154 ncopies
, &memory_access_type
,
7155 &alignment_support_scheme
, &gs_info
))
7160 if (memory_access_type
== VMAT_CONTIGUOUS
)
7162 if (!VECTOR_MODE_P (vec_mode
)
7163 || !can_vec_mask_load_store_p (vec_mode
,
7164 TYPE_MODE (mask_vectype
), false))
7167 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7168 && (memory_access_type
!= VMAT_GATHER_SCATTER
7169 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7171 if (dump_enabled_p ())
7172 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7173 "unsupported access type for masked store.\n");
7179 /* FORNOW. In some cases can vectorize even if data-type not supported
7180 (e.g. - array initialization with 0). */
7181 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7185 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7186 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7187 && memory_access_type
!= VMAT_GATHER_SCATTER
7188 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7191 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7192 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7193 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7197 first_stmt_info
= stmt_info
;
7198 first_dr_info
= dr_info
;
7199 group_size
= vec_num
= 1;
7202 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7204 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7205 memory_access_type
))
7209 if (!vec_stmt
) /* transformation not required. */
7211 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7214 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7215 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, vls_type
,
7216 group_size
, memory_access_type
,
7220 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7223 if (dump_enabled_p ())
7224 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7225 "incompatible vector types for invariants\n");
7229 if (dump_enabled_p ()
7230 && memory_access_type
!= VMAT_ELEMENTWISE
7231 && memory_access_type
!= VMAT_GATHER_SCATTER
7232 && alignment_support_scheme
!= dr_aligned
)
7233 dump_printf_loc (MSG_NOTE
, vect_location
,
7234 "Vectorizing an unaligned access.\n");
7236 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7237 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7238 memory_access_type
, vls_type
, slp_node
, cost_vec
);
7241 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7245 ensure_base_align (dr_info
);
7247 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7249 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7250 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7251 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7252 tree ptr
, var
, scale
, vec_mask
;
7253 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7254 tree mask_halfvectype
= mask_vectype
;
7255 edge pe
= loop_preheader_edge (loop
);
7258 enum { NARROW
, NONE
, WIDEN
} modifier
;
7259 poly_uint64 scatter_off_nunits
7260 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7262 if (known_eq (nunits
, scatter_off_nunits
))
7264 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7268 /* Currently gathers and scatters are only supported for
7269 fixed-length vectors. */
7270 unsigned int count
= scatter_off_nunits
.to_constant ();
7271 vec_perm_builder
sel (count
, count
, 1);
7272 for (i
= 0; i
< (unsigned int) count
; ++i
)
7273 sel
.quick_push (i
| (count
/ 2));
7275 vec_perm_indices
indices (sel
, 1, count
);
7276 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7278 gcc_assert (perm_mask
!= NULL_TREE
);
7280 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7284 /* Currently gathers and scatters are only supported for
7285 fixed-length vectors. */
7286 unsigned int count
= nunits
.to_constant ();
7287 vec_perm_builder
sel (count
, count
, 1);
7288 for (i
= 0; i
< (unsigned int) count
; ++i
)
7289 sel
.quick_push (i
| (count
/ 2));
7291 vec_perm_indices
indices (sel
, 2, count
);
7292 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7293 gcc_assert (perm_mask
!= NULL_TREE
);
7297 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7302 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7303 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7304 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7305 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7306 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7307 scaletype
= TREE_VALUE (arglist
);
7309 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7310 && TREE_CODE (rettype
) == VOID_TYPE
);
7312 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7313 if (!is_gimple_min_invariant (ptr
))
7315 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7316 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7317 gcc_assert (!new_bb
);
7320 if (mask
== NULL_TREE
)
7322 mask_arg
= build_int_cst (masktype
, -1);
7323 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7324 mask_arg
, masktype
, NULL
);
7327 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7329 auto_vec
<tree
> vec_oprnds0
;
7330 auto_vec
<tree
> vec_oprnds1
;
7331 auto_vec
<tree
> vec_masks
;
7334 tree mask_vectype
= truth_type_for (vectype
);
7335 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7337 ? ncopies
/ 2 : ncopies
,
7338 mask
, &vec_masks
, mask_vectype
);
7340 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7342 ? ncopies
/ 2 : ncopies
,
7343 gs_info
.offset
, &vec_oprnds0
);
7344 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7346 ? ncopies
/ 2 : ncopies
,
7348 for (j
= 0; j
< ncopies
; ++j
)
7350 if (modifier
== WIDEN
)
7353 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7354 perm_mask
, stmt_info
, gsi
);
7356 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7357 src
= vec_oprnd1
= vec_oprnds1
[j
];
7359 mask_op
= vec_mask
= vec_masks
[j
];
7361 else if (modifier
== NARROW
)
7364 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7365 perm_mask
, stmt_info
, gsi
);
7367 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7368 op
= vec_oprnd0
= vec_oprnds0
[j
];
7370 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7374 op
= vec_oprnd0
= vec_oprnds0
[j
];
7375 src
= vec_oprnd1
= vec_oprnds1
[j
];
7377 mask_op
= vec_mask
= vec_masks
[j
];
7380 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7382 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7383 TYPE_VECTOR_SUBPARTS (srctype
)));
7384 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7385 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7387 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7388 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7392 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7394 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7395 TYPE_VECTOR_SUBPARTS (idxtype
)));
7396 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7397 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7399 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7400 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7408 if (modifier
== NARROW
)
7410 var
= vect_get_new_ssa_name (mask_halfvectype
,
7413 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7414 : VEC_UNPACK_LO_EXPR
,
7416 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7419 tree optype
= TREE_TYPE (mask_arg
);
7420 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7423 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7424 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7425 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7427 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7428 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7430 if (!useless_type_conversion_p (masktype
, utype
))
7432 gcc_assert (TYPE_PRECISION (utype
)
7433 <= TYPE_PRECISION (masktype
));
7434 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7435 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7436 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7442 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7443 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7445 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7447 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7450 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7451 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7453 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7454 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7459 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7461 /* We vectorize all the stmts of the interleaving group when we
7462 reach the last stmt in the group. */
7463 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7464 < DR_GROUP_SIZE (first_stmt_info
)
7473 grouped_store
= false;
7474 /* VEC_NUM is the number of vect stmts to be created for this
7476 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7477 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7478 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7479 == first_stmt_info
);
7480 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7481 op
= vect_get_store_rhs (first_stmt_info
);
7484 /* VEC_NUM is the number of vect stmts to be created for this
7486 vec_num
= group_size
;
7488 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7491 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7493 if (dump_enabled_p ())
7494 dump_printf_loc (MSG_NOTE
, vect_location
,
7495 "transform store. ncopies = %d\n", ncopies
);
7497 if (memory_access_type
== VMAT_ELEMENTWISE
7498 || memory_access_type
== VMAT_STRIDED_SLP
)
7500 gimple_stmt_iterator incr_gsi
;
7506 tree stride_base
, stride_step
, alias_off
;
7510 /* Checked by get_load_store_type. */
7511 unsigned int const_nunits
= nunits
.to_constant ();
7513 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7514 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7516 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7518 = fold_build_pointer_plus
7519 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7520 size_binop (PLUS_EXPR
,
7521 convert_to_ptrofftype (dr_offset
),
7522 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7523 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7525 /* For a store with loop-invariant (but other than power-of-2)
7526 stride (i.e. not a grouped access) like so:
7528 for (i = 0; i < n; i += stride)
7531 we generate a new induction variable and new stores from
7532 the components of the (vectorized) rhs:
7534 for (j = 0; ; j += VF*stride)
7539 array[j + stride] = tmp2;
7543 unsigned nstores
= const_nunits
;
7545 tree ltype
= elem_type
;
7546 tree lvectype
= vectype
;
7549 if (group_size
< const_nunits
7550 && const_nunits
% group_size
== 0)
7552 nstores
= const_nunits
/ group_size
;
7554 ltype
= build_vector_type (elem_type
, group_size
);
7557 /* First check if vec_extract optab doesn't support extraction
7558 of vector elts directly. */
7559 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7561 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7562 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7563 group_size
).exists (&vmode
)
7564 || (convert_optab_handler (vec_extract_optab
,
7565 TYPE_MODE (vectype
), vmode
)
7566 == CODE_FOR_nothing
))
7568 /* Try to avoid emitting an extract of vector elements
7569 by performing the extracts using an integer type of the
7570 same size, extracting from a vector of those and then
7571 re-interpreting it as the original vector type if
7574 = group_size
* GET_MODE_BITSIZE (elmode
);
7575 unsigned int lnunits
= const_nunits
/ group_size
;
7576 /* If we can't construct such a vector fall back to
7577 element extracts from the original vector type and
7578 element size stores. */
7579 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7580 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7581 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7582 lnunits
).exists (&vmode
)
7583 && (convert_optab_handler (vec_extract_optab
,
7585 != CODE_FOR_nothing
))
7589 ltype
= build_nonstandard_integer_type (lsize
, 1);
7590 lvectype
= build_vector_type (ltype
, nstores
);
7592 /* Else fall back to vector extraction anyway.
7593 Fewer stores are more important than avoiding spilling
7594 of the vector we extract from. Compared to the
7595 construction case in vectorizable_load no store-forwarding
7596 issue exists here for reasonable archs. */
7599 else if (group_size
>= const_nunits
7600 && group_size
% const_nunits
== 0)
7603 lnel
= const_nunits
;
7607 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7608 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7611 ivstep
= stride_step
;
7612 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7613 build_int_cst (TREE_TYPE (ivstep
), vf
));
7615 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7617 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7618 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7619 create_iv (stride_base
, ivstep
, NULL
,
7620 loop
, &incr_gsi
, insert_after
,
7622 incr
= gsi_stmt (incr_gsi
);
7624 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7626 alias_off
= build_int_cst (ref_type
, 0);
7627 stmt_vec_info next_stmt_info
= first_stmt_info
;
7628 for (g
= 0; g
< group_size
; g
++)
7630 running_off
= offvar
;
7633 tree size
= TYPE_SIZE_UNIT (ltype
);
7634 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7636 tree newoff
= copy_ssa_name (running_off
, NULL
);
7637 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7639 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7640 running_off
= newoff
;
7643 op
= vect_get_store_rhs (next_stmt_info
);
7644 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
7646 unsigned int group_el
= 0;
7647 unsigned HOST_WIDE_INT
7648 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7649 for (j
= 0; j
< ncopies
; j
++)
7651 vec_oprnd
= vec_oprnds
[j
];
7652 /* Pun the vector to extract from if necessary. */
7653 if (lvectype
!= vectype
)
7655 tree tem
= make_ssa_name (lvectype
);
7657 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7658 lvectype
, vec_oprnd
));
7659 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
7662 for (i
= 0; i
< nstores
; i
++)
7664 tree newref
, newoff
;
7665 gimple
*incr
, *assign
;
7666 tree size
= TYPE_SIZE (ltype
);
7667 /* Extract the i'th component. */
7668 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7669 bitsize_int (i
), size
);
7670 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7673 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7677 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7679 newref
= build2 (MEM_REF
, ltype
,
7680 running_off
, this_off
);
7681 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7683 /* And store it to *running_off. */
7684 assign
= gimple_build_assign (newref
, elem
);
7685 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
7689 || group_el
== group_size
)
7691 newoff
= copy_ssa_name (running_off
, NULL
);
7692 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7693 running_off
, stride_step
);
7694 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7696 running_off
= newoff
;
7699 if (g
== group_size
- 1
7702 if (j
== 0 && i
== 0)
7704 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
7708 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7713 vec_oprnds
.release ();
7717 auto_vec
<tree
> dr_chain (group_size
);
7718 oprnds
.create (group_size
);
7720 /* Gather-scatter accesses perform only component accesses, alignment
7721 is irrelevant for them. */
7722 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7723 alignment_support_scheme
= dr_unaligned_supported
;
7725 alignment_support_scheme
7726 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
7728 gcc_assert (alignment_support_scheme
);
7729 vec_loop_masks
*loop_masks
7730 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7731 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7733 vec_loop_lens
*loop_lens
7734 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
7735 ? &LOOP_VINFO_LENS (loop_vinfo
)
7738 /* Shouldn't go with length-based approach if fully masked. */
7739 gcc_assert (!loop_lens
|| !loop_masks
);
7741 /* Targets with store-lane instructions must not require explicit
7742 realignment. vect_supportable_dr_alignment always returns either
7743 dr_aligned or dr_unaligned_supported for masked operations. */
7744 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7747 || alignment_support_scheme
== dr_aligned
7748 || alignment_support_scheme
== dr_unaligned_supported
);
7750 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
7751 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7752 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7755 tree vec_offset
= NULL_TREE
;
7756 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7758 aggr_type
= NULL_TREE
;
7761 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7763 aggr_type
= elem_type
;
7764 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
7765 &bump
, &vec_offset
);
7769 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7770 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7772 aggr_type
= vectype
;
7773 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
7774 memory_access_type
);
7778 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
7780 /* In case the vectorization factor (VF) is bigger than the number
7781 of elements that we can fit in a vectype (nunits), we have to generate
7782 more than one vector stmt - i.e - we need to "unroll" the
7783 vector stmt by a factor VF/nunits. */
7785 /* In case of interleaving (non-unit grouped access):
7792 We create vectorized stores starting from base address (the access of the
7793 first stmt in the chain (S2 in the above example), when the last store stmt
7794 of the chain (S4) is reached:
7797 VS2: &base + vec_size*1 = vx0
7798 VS3: &base + vec_size*2 = vx1
7799 VS4: &base + vec_size*3 = vx3
7801 Then permutation statements are generated:
7803 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7804 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7807 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7808 (the order of the data-refs in the output of vect_permute_store_chain
7809 corresponds to the order of scalar stmts in the interleaving chain - see
7810 the documentation of vect_permute_store_chain()).
7812 In case of both multiple types and interleaving, above vector stores and
7813 permutation stmts are created for every copy. The result vector stmts are
7814 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7815 STMT_VINFO_RELATED_STMT for the next copies.
7818 auto_vec
<tree
> vec_masks
;
7819 tree vec_mask
= NULL
;
7820 auto_vec
<tree
> vec_offsets
;
7821 auto_vec
<vec
<tree
> > gvec_oprnds
;
7822 gvec_oprnds
.safe_grow_cleared (group_size
, true);
7823 for (j
= 0; j
< ncopies
; j
++)
7830 /* Get vectorized arguments for SLP_NODE. */
7831 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
7833 vec_oprnd
= vec_oprnds
[0];
7837 /* For interleaved stores we collect vectorized defs for all the
7838 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7839 used as an input to vect_permute_store_chain().
7841 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7842 and OPRNDS are of size 1. */
7843 stmt_vec_info next_stmt_info
= first_stmt_info
;
7844 for (i
= 0; i
< group_size
; i
++)
7846 /* Since gaps are not supported for interleaved stores,
7847 DR_GROUP_SIZE is the exact number of stmts in the chain.
7848 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7849 that there is no interleaving, DR_GROUP_SIZE is 1,
7850 and only one iteration of the loop will be executed. */
7851 op
= vect_get_store_rhs (next_stmt_info
);
7852 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
7853 ncopies
, op
, &gvec_oprnds
[i
]);
7854 vec_oprnd
= gvec_oprnds
[i
][0];
7855 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
7856 oprnds
.quick_push (gvec_oprnds
[i
][0]);
7857 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7861 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
7862 mask
, &vec_masks
, mask_vectype
);
7863 vec_mask
= vec_masks
[0];
7867 /* We should have catched mismatched types earlier. */
7868 gcc_assert (useless_type_conversion_p (vectype
,
7869 TREE_TYPE (vec_oprnd
)));
7870 bool simd_lane_access_p
7871 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
7872 if (simd_lane_access_p
7874 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
7875 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
7876 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
7877 && integer_zerop (DR_INIT (first_dr_info
->dr
))
7878 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7879 get_alias_set (TREE_TYPE (ref_type
))))
7881 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
7882 dataref_offset
= build_int_cst (ref_type
, 0);
7884 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7886 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
7887 &dataref_ptr
, &vec_offsets
, ncopies
);
7888 vec_offset
= vec_offsets
[0];
7892 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
7893 simd_lane_access_p
? loop
: NULL
,
7894 offset
, &dummy
, gsi
, &ptr_incr
,
7895 simd_lane_access_p
, NULL_TREE
, bump
);
7899 /* For interleaved stores we created vectorized defs for all the
7900 defs stored in OPRNDS in the previous iteration (previous copy).
7901 DR_CHAIN is then used as an input to vect_permute_store_chain().
7902 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7903 OPRNDS are of size 1. */
7904 for (i
= 0; i
< group_size
; i
++)
7906 vec_oprnd
= gvec_oprnds
[i
][j
];
7907 dr_chain
[i
] = gvec_oprnds
[i
][j
];
7908 oprnds
[i
] = gvec_oprnds
[i
][j
];
7911 vec_mask
= vec_masks
[j
];
7914 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7915 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7916 vec_offset
= vec_offsets
[j
];
7918 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
7922 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7926 /* Get an array into which we can store the individual vectors. */
7927 vec_array
= create_vector_array (vectype
, vec_num
);
7929 /* Invalidate the current contents of VEC_ARRAY. This should
7930 become an RTL clobber too, which prevents the vector registers
7931 from being upward-exposed. */
7932 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
7934 /* Store the individual vectors into the array. */
7935 for (i
= 0; i
< vec_num
; i
++)
7937 vec_oprnd
= dr_chain
[i
];
7938 write_vector_array (vinfo
, stmt_info
,
7939 gsi
, vec_oprnd
, vec_array
, i
);
7942 tree final_mask
= NULL
;
7944 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
7947 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7954 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7956 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7957 tree alias_ptr
= build_int_cst (ref_type
, align
);
7958 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
7959 dataref_ptr
, alias_ptr
,
7960 final_mask
, vec_array
);
7965 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7966 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7967 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
7969 gimple_call_set_lhs (call
, data_ref
);
7971 gimple_call_set_nothrow (call
, true);
7972 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7975 /* Record that VEC_ARRAY is now dead. */
7976 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
7984 result_chain
.create (group_size
);
7986 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
7987 gsi
, &result_chain
);
7990 stmt_vec_info next_stmt_info
= first_stmt_info
;
7991 for (i
= 0; i
< vec_num
; i
++)
7994 unsigned HOST_WIDE_INT align
;
7996 tree final_mask
= NULL_TREE
;
7998 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8000 vectype
, vec_num
* j
+ i
);
8002 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8005 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8007 tree scale
= size_int (gs_info
.scale
);
8010 call
= gimple_build_call_internal
8011 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8012 scale
, vec_oprnd
, final_mask
);
8014 call
= gimple_build_call_internal
8015 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8017 gimple_call_set_nothrow (call
, true);
8018 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8024 /* Bump the vector pointer. */
8025 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8026 gsi
, stmt_info
, bump
);
8029 vec_oprnd
= vec_oprnds
[i
];
8030 else if (grouped_store
)
8031 /* For grouped stores vectorized defs are interleaved in
8032 vect_permute_store_chain(). */
8033 vec_oprnd
= result_chain
[i
];
8035 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8036 if (aligned_access_p (first_dr_info
))
8038 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8040 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8044 misalign
= DR_MISALIGNMENT (first_dr_info
);
8045 if (dataref_offset
== NULL_TREE
8046 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8047 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8050 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8052 tree perm_mask
= perm_mask_for_reverse (vectype
);
8053 tree perm_dest
= vect_create_destination_var
8054 (vect_get_store_rhs (stmt_info
), vectype
);
8055 tree new_temp
= make_ssa_name (perm_dest
);
8057 /* Generate the permute statement. */
8059 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8060 vec_oprnd
, perm_mask
);
8061 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8063 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8064 vec_oprnd
= new_temp
;
8067 /* Arguments are ready. Create the new vector stmt. */
8070 align
= least_bit_hwi (misalign
| align
);
8071 tree ptr
= build_int_cst (ref_type
, align
);
8073 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8075 final_mask
, vec_oprnd
);
8076 gimple_call_set_nothrow (call
, true);
8077 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8083 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8084 vec_num
* ncopies
, vec_num
* j
+ i
);
8085 align
= least_bit_hwi (misalign
| align
);
8086 tree ptr
= build_int_cst (ref_type
, align
);
8087 machine_mode vmode
= TYPE_MODE (vectype
);
8088 opt_machine_mode new_ovmode
8089 = get_len_load_store_mode (vmode
, false);
8090 machine_mode new_vmode
= new_ovmode
.require ();
8091 /* Need conversion if it's wrapped with VnQI. */
8092 if (vmode
!= new_vmode
)
8095 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8098 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8100 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8102 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8104 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8109 = gimple_build_call_internal (IFN_LEN_STORE
, 4, dataref_ptr
,
8110 ptr
, final_len
, vec_oprnd
);
8111 gimple_call_set_nothrow (call
, true);
8112 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8117 data_ref
= fold_build2 (MEM_REF
, vectype
,
8121 : build_int_cst (ref_type
, 0));
8122 if (aligned_access_p (first_dr_info
))
8124 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8125 TREE_TYPE (data_ref
)
8126 = build_aligned_type (TREE_TYPE (data_ref
),
8127 align
* BITS_PER_UNIT
);
8129 TREE_TYPE (data_ref
)
8130 = build_aligned_type (TREE_TYPE (data_ref
),
8131 TYPE_ALIGN (elem_type
));
8132 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8133 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8134 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8140 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8141 if (!next_stmt_info
)
8148 *vec_stmt
= new_stmt
;
8149 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8153 for (i
= 0; i
< group_size
; ++i
)
8155 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8159 result_chain
.release ();
8160 vec_oprnds
.release ();
8165 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8166 VECTOR_CST mask. No checks are made that the target platform supports the
8167 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8168 vect_gen_perm_mask_checked. */
8171 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8175 poly_uint64 nunits
= sel
.length ();
8176 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8178 mask_type
= build_vector_type (ssizetype
, nunits
);
8179 return vec_perm_indices_to_tree (mask_type
, sel
);
8182 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8183 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8186 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8188 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8189 return vect_gen_perm_mask_any (vectype
, sel
);
8192 /* Given a vector variable X and Y, that was generated for the scalar
8193 STMT_INFO, generate instructions to permute the vector elements of X and Y
8194 using permutation mask MASK_VEC, insert them at *GSI and return the
8195 permuted vector variable. */
8198 permute_vec_elements (vec_info
*vinfo
,
8199 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8200 gimple_stmt_iterator
*gsi
)
8202 tree vectype
= TREE_TYPE (x
);
8203 tree perm_dest
, data_ref
;
8206 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8207 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8208 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8210 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8211 data_ref
= make_ssa_name (perm_dest
);
8213 /* Generate the permute statement. */
8214 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8215 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8220 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8221 inserting them on the loops preheader edge. Returns true if we
8222 were successful in doing so (and thus STMT_INFO can be moved then),
8223 otherwise returns false. */
8226 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8232 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8234 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8235 if (!gimple_nop_p (def_stmt
)
8236 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8238 /* Make sure we don't need to recurse. While we could do
8239 so in simple cases when there are more complex use webs
8240 we don't have an easy way to preserve stmt order to fulfil
8241 dependencies within them. */
8244 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8246 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8248 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8249 if (!gimple_nop_p (def_stmt2
)
8250 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8260 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8262 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8263 if (!gimple_nop_p (def_stmt
)
8264 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8266 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8267 gsi_remove (&gsi
, false);
8268 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8275 /* vectorizable_load.
8277 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8278 that can be vectorized.
8279 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8280 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8281 Return true if STMT_INFO is vectorizable in this way. */
8284 vectorizable_load (vec_info
*vinfo
,
8285 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8286 gimple
**vec_stmt
, slp_tree slp_node
,
8287 stmt_vector_for_cost
*cost_vec
)
8290 tree vec_dest
= NULL
;
8291 tree data_ref
= NULL
;
8292 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8293 class loop
*loop
= NULL
;
8294 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8295 bool nested_in_vect_loop
= false;
8300 tree dataref_ptr
= NULL_TREE
;
8301 tree dataref_offset
= NULL_TREE
;
8302 gimple
*ptr_incr
= NULL
;
8305 unsigned int group_size
;
8306 poly_uint64 group_gap_adj
;
8307 tree msq
= NULL_TREE
, lsq
;
8308 tree offset
= NULL_TREE
;
8309 tree byte_offset
= NULL_TREE
;
8310 tree realignment_token
= NULL_TREE
;
8312 vec
<tree
> dr_chain
= vNULL
;
8313 bool grouped_load
= false;
8314 stmt_vec_info first_stmt_info
;
8315 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8316 bool compute_in_loop
= false;
8317 class loop
*at_loop
;
8319 bool slp
= (slp_node
!= NULL
);
8320 bool slp_perm
= false;
8321 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8324 gather_scatter_info gs_info
;
8326 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8328 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8331 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8335 if (!STMT_VINFO_DATA_REF (stmt_info
))
8338 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8339 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8340 which can be different when reduction chains were re-ordered.
8341 Now that we figured we're a dataref reset stmt_info back to
8342 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8343 refactored in a way to maintain the dr_vec_info pointer for the
8344 relevant access explicitely. */
8345 stmt_vec_info orig_stmt_info
= stmt_info
;
8347 stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8349 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8350 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8352 scalar_dest
= gimple_assign_lhs (assign
);
8353 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8356 tree_code code
= gimple_assign_rhs_code (assign
);
8357 if (code
!= ARRAY_REF
8358 && code
!= BIT_FIELD_REF
8359 && code
!= INDIRECT_REF
8360 && code
!= COMPONENT_REF
8361 && code
!= IMAGPART_EXPR
8362 && code
!= REALPART_EXPR
8364 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8369 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8370 if (!call
|| !gimple_call_internal_p (call
))
8373 internal_fn ifn
= gimple_call_internal_fn (call
);
8374 if (!internal_load_fn_p (ifn
))
8377 scalar_dest
= gimple_call_lhs (call
);
8381 int mask_index
= internal_fn_mask_index (ifn
);
8382 if (mask_index
>= 0)
8384 mask
= gimple_call_arg (call
, mask_index
);
8385 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
8391 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8392 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8396 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8397 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8398 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8403 /* Multiple types in SLP are handled by creating the appropriate number of
8404 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8409 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8411 gcc_assert (ncopies
>= 1);
8413 /* FORNOW. This restriction should be relaxed. */
8414 if (nested_in_vect_loop
&& ncopies
> 1)
8416 if (dump_enabled_p ())
8417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8418 "multiple types in nested loop.\n");
8422 /* Invalidate assumptions made by dependence analysis when vectorization
8423 on the unrolled body effectively re-orders stmts. */
8425 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8426 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8427 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8429 if (dump_enabled_p ())
8430 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8431 "cannot perform implicit CSE when unrolling "
8432 "with negative dependence distance\n");
8436 elem_type
= TREE_TYPE (vectype
);
8437 mode
= TYPE_MODE (vectype
);
8439 /* FORNOW. In some cases can vectorize even if data-type not supported
8440 (e.g. - data copies). */
8441 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8443 if (dump_enabled_p ())
8444 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8445 "Aligned load, but unsupported type.\n");
8449 /* Check if the load is a part of an interleaving chain. */
8450 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8452 grouped_load
= true;
8454 gcc_assert (!nested_in_vect_loop
);
8455 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8457 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8458 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8460 /* Refuse non-SLP vectorization of SLP-only groups. */
8461 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8463 if (dump_enabled_p ())
8464 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8465 "cannot vectorize load in non-SLP mode.\n");
8469 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8475 /* In BB vectorization we may not actually use a loaded vector
8476 accessing elements in excess of DR_GROUP_SIZE. */
8477 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8478 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8479 unsigned HOST_WIDE_INT nunits
;
8480 unsigned j
, k
, maxk
= 0;
8481 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8484 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
8485 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8486 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8488 if (dump_enabled_p ())
8489 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8490 "BB vectorization with gaps at the end of "
8491 "a load is not supported\n");
8498 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8501 if (dump_enabled_p ())
8502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8504 "unsupported load permutation\n");
8509 /* Invalidate assumptions made by dependence analysis when vectorization
8510 on the unrolled body effectively re-orders stmts. */
8511 if (!PURE_SLP_STMT (stmt_info
)
8512 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8513 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8514 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8516 if (dump_enabled_p ())
8517 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8518 "cannot perform implicit CSE when performing "
8519 "group loads with negative dependence distance\n");
8526 vect_memory_access_type memory_access_type
;
8527 enum dr_alignment_support alignment_support_scheme
;
8528 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
8529 ncopies
, &memory_access_type
,
8530 &alignment_support_scheme
, &gs_info
))
8535 if (memory_access_type
== VMAT_CONTIGUOUS
)
8537 machine_mode vec_mode
= TYPE_MODE (vectype
);
8538 if (!VECTOR_MODE_P (vec_mode
)
8539 || !can_vec_mask_load_store_p (vec_mode
,
8540 TYPE_MODE (mask_vectype
), true))
8543 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8544 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8546 if (dump_enabled_p ())
8547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8548 "unsupported access type for masked load.\n");
8553 if (!vec_stmt
) /* transformation not required. */
8556 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8559 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8560 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, VLS_LOAD
,
8561 group_size
, memory_access_type
,
8564 if (dump_enabled_p ()
8565 && memory_access_type
!= VMAT_ELEMENTWISE
8566 && memory_access_type
!= VMAT_GATHER_SCATTER
8567 && alignment_support_scheme
!= dr_aligned
)
8568 dump_printf_loc (MSG_NOTE
, vect_location
,
8569 "Vectorizing an unaligned access.\n");
8571 STMT_VINFO_TYPE (orig_stmt_info
) = load_vec_info_type
;
8572 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8573 slp_node
, cost_vec
);
8578 gcc_assert (memory_access_type
8579 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8581 if (dump_enabled_p ())
8582 dump_printf_loc (MSG_NOTE
, vect_location
,
8583 "transform load. ncopies = %d\n", ncopies
);
8587 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8588 ensure_base_align (dr_info
);
8590 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8592 vect_build_gather_load_calls (vinfo
,
8593 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8597 if (memory_access_type
== VMAT_INVARIANT
)
8599 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8600 /* If we have versioned for aliasing or the loop doesn't
8601 have any data dependencies that would preclude this,
8602 then we are sure this is a loop invariant load and
8603 thus we can insert it on the preheader edge. */
8604 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8605 && !nested_in_vect_loop
8606 && hoist_defs_of_uses (stmt_info
, loop
));
8609 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8610 if (dump_enabled_p ())
8611 dump_printf_loc (MSG_NOTE
, vect_location
,
8612 "hoisting out of the vectorized loop: %G", stmt
);
8613 scalar_dest
= copy_ssa_name (scalar_dest
);
8614 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8615 gsi_insert_on_edge_immediate
8616 (loop_preheader_edge (loop
),
8617 gimple_build_assign (scalar_dest
, rhs
));
8619 /* These copies are all equivalent, but currently the representation
8620 requires a separate STMT_VINFO_VEC_STMT for each one. */
8621 gimple_stmt_iterator gsi2
= *gsi
;
8623 for (j
= 0; j
< ncopies
; j
++)
8626 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8629 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8631 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8633 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8637 *vec_stmt
= new_stmt
;
8638 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8644 if (memory_access_type
== VMAT_ELEMENTWISE
8645 || memory_access_type
== VMAT_STRIDED_SLP
)
8647 gimple_stmt_iterator incr_gsi
;
8652 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8653 tree stride_base
, stride_step
, alias_off
;
8654 /* Checked by get_load_store_type. */
8655 unsigned int const_nunits
= nunits
.to_constant ();
8656 unsigned HOST_WIDE_INT cst_offset
= 0;
8659 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
8660 gcc_assert (!nested_in_vect_loop
);
8664 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8665 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8669 first_stmt_info
= stmt_info
;
8670 first_dr_info
= dr_info
;
8672 if (slp
&& grouped_load
)
8674 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8675 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8681 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8682 * vect_get_place_in_interleaving_chain (stmt_info
,
8685 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8688 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8690 = fold_build_pointer_plus
8691 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8692 size_binop (PLUS_EXPR
,
8693 convert_to_ptrofftype (dr_offset
),
8694 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8695 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8697 /* For a load with loop-invariant (but other than power-of-2)
8698 stride (i.e. not a grouped access) like so:
8700 for (i = 0; i < n; i += stride)
8703 we generate a new induction variable and new accesses to
8704 form a new vector (or vectors, depending on ncopies):
8706 for (j = 0; ; j += VF*stride)
8708 tmp2 = array[j + stride];
8710 vectemp = {tmp1, tmp2, ...}
8713 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8714 build_int_cst (TREE_TYPE (stride_step
), vf
));
8716 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8718 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8719 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8720 create_iv (stride_base
, ivstep
, NULL
,
8721 loop
, &incr_gsi
, insert_after
,
8724 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8726 running_off
= offvar
;
8727 alias_off
= build_int_cst (ref_type
, 0);
8728 int nloads
= const_nunits
;
8730 tree ltype
= TREE_TYPE (vectype
);
8731 tree lvectype
= vectype
;
8732 auto_vec
<tree
> dr_chain
;
8733 if (memory_access_type
== VMAT_STRIDED_SLP
)
8735 if (group_size
< const_nunits
)
8737 /* First check if vec_init optab supports construction from vector
8738 elts directly. Otherwise avoid emitting a constructor of
8739 vector elements by performing the loads using an integer type
8740 of the same size, constructing a vector of those and then
8741 re-interpreting it as the original vector type. This avoids a
8742 huge runtime penalty due to the general inability to perform
8743 store forwarding from smaller stores to a larger load. */
8746 = vector_vector_composition_type (vectype
,
8747 const_nunits
/ group_size
,
8749 if (vtype
!= NULL_TREE
)
8751 nloads
= const_nunits
/ group_size
;
8760 lnel
= const_nunits
;
8763 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8765 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8766 else if (nloads
== 1)
8771 /* For SLP permutation support we need to load the whole group,
8772 not only the number of vector stmts the permutation result
8776 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8778 unsigned int const_vf
= vf
.to_constant ();
8779 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
8780 dr_chain
.create (ncopies
);
8783 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8785 unsigned int group_el
= 0;
8786 unsigned HOST_WIDE_INT
8787 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8788 for (j
= 0; j
< ncopies
; j
++)
8791 vec_alloc (v
, nloads
);
8792 gimple
*new_stmt
= NULL
;
8793 for (i
= 0; i
< nloads
; i
++)
8795 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8796 group_el
* elsz
+ cst_offset
);
8797 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
8798 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8799 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
8800 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8802 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
8803 gimple_assign_lhs (new_stmt
));
8807 || group_el
== group_size
)
8809 tree newoff
= copy_ssa_name (running_off
);
8810 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8811 running_off
, stride_step
);
8812 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8814 running_off
= newoff
;
8820 tree vec_inv
= build_constructor (lvectype
, v
);
8821 new_temp
= vect_init_vector (vinfo
, stmt_info
,
8822 vec_inv
, lvectype
, gsi
);
8823 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8824 if (lvectype
!= vectype
)
8826 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
8828 build1 (VIEW_CONVERT_EXPR
,
8829 vectype
, new_temp
));
8830 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8837 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
8839 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8844 *vec_stmt
= new_stmt
;
8845 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8851 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
8857 if (memory_access_type
== VMAT_GATHER_SCATTER
8858 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
8859 grouped_load
= false;
8863 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8864 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8865 /* For SLP vectorization we directly vectorize a subchain
8866 without permutation. */
8867 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8868 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8869 /* For BB vectorization always use the first stmt to base
8870 the data ref pointer on. */
8872 first_stmt_info_for_drptr
8873 = vect_find_first_scalar_stmt_in_slp (slp_node
);
8875 /* Check if the chain of loads is already vectorized. */
8876 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
8877 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8878 ??? But we can only do so if there is exactly one
8879 as we have no way to get at the rest. Leave the CSE
8881 ??? With the group load eventually participating
8882 in multiple different permutations (having multiple
8883 slp nodes which refer to the same group) the CSE
8884 is even wrong code. See PR56270. */
8887 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
8890 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8893 /* VEC_NUM is the number of vect stmts to be created for this group. */
8896 grouped_load
= false;
8897 /* If an SLP permutation is from N elements to N elements,
8898 and if one vector holds a whole number of N, we can load
8899 the inputs to the permutation in the same way as an
8900 unpermuted sequence. In other cases we need to load the
8901 whole group, not only the number of vector stmts the
8902 permutation result fits in. */
8903 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
8905 && (group_size
!= scalar_lanes
8906 || !multiple_p (nunits
, group_size
)))
8908 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8909 variable VF; see vect_transform_slp_perm_load. */
8910 unsigned int const_vf
= vf
.to_constant ();
8911 unsigned int const_nunits
= nunits
.to_constant ();
8912 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
8913 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
8917 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8919 = group_size
- scalar_lanes
;
8923 vec_num
= group_size
;
8925 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8929 first_stmt_info
= stmt_info
;
8930 first_dr_info
= dr_info
;
8931 group_size
= vec_num
= 1;
8933 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8936 gcc_assert (alignment_support_scheme
);
8937 vec_loop_masks
*loop_masks
8938 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8939 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8941 vec_loop_lens
*loop_lens
8942 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8943 ? &LOOP_VINFO_LENS (loop_vinfo
)
8946 /* Shouldn't go with length-based approach if fully masked. */
8947 gcc_assert (!loop_lens
|| !loop_masks
);
8949 /* Targets with store-lane instructions must not require explicit
8950 realignment. vect_supportable_dr_alignment always returns either
8951 dr_aligned or dr_unaligned_supported for masked operations. */
8952 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8955 || alignment_support_scheme
== dr_aligned
8956 || alignment_support_scheme
== dr_unaligned_supported
);
8958 /* In case the vectorization factor (VF) is bigger than the number
8959 of elements that we can fit in a vectype (nunits), we have to generate
8960 more than one vector stmt - i.e - we need to "unroll" the
8961 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8962 from one copy of the vector stmt to the next, in the field
8963 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8964 stages to find the correct vector defs to be used when vectorizing
8965 stmts that use the defs of the current stmt. The example below
8966 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8967 need to create 4 vectorized stmts):
8969 before vectorization:
8970 RELATED_STMT VEC_STMT
8974 step 1: vectorize stmt S1:
8975 We first create the vector stmt VS1_0, and, as usual, record a
8976 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8977 Next, we create the vector stmt VS1_1, and record a pointer to
8978 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8979 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8981 RELATED_STMT VEC_STMT
8982 VS1_0: vx0 = memref0 VS1_1 -
8983 VS1_1: vx1 = memref1 VS1_2 -
8984 VS1_2: vx2 = memref2 VS1_3 -
8985 VS1_3: vx3 = memref3 - -
8986 S1: x = load - VS1_0
8990 /* In case of interleaving (non-unit grouped access):
8997 Vectorized loads are created in the order of memory accesses
8998 starting from the access of the first stmt of the chain:
9001 VS2: vx1 = &base + vec_size*1
9002 VS3: vx3 = &base + vec_size*2
9003 VS4: vx4 = &base + vec_size*3
9005 Then permutation statements are generated:
9007 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9008 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9011 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9012 (the order of the data-refs in the output of vect_permute_load_chain
9013 corresponds to the order of scalar stmts in the interleaving chain - see
9014 the documentation of vect_permute_load_chain()).
9015 The generation of permutation stmts and recording them in
9016 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9018 In case of both multiple types and interleaving, the vector loads and
9019 permutation stmts above are created for every copy. The result vector
9020 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9021 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9023 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9024 on a target that supports unaligned accesses (dr_unaligned_supported)
9025 we generate the following code:
9029 p = p + indx * vectype_size;
9034 Otherwise, the data reference is potentially unaligned on a target that
9035 does not support unaligned accesses (dr_explicit_realign_optimized) -
9036 then generate the following code, in which the data in each iteration is
9037 obtained by two vector loads, one from the previous iteration, and one
9038 from the current iteration:
9040 msq_init = *(floor(p1))
9041 p2 = initial_addr + VS - 1;
9042 realignment_token = call target_builtin;
9045 p2 = p2 + indx * vectype_size
9047 vec_dest = realign_load (msq, lsq, realignment_token)
9052 /* If the misalignment remains the same throughout the execution of the
9053 loop, we can create the init_addr and permutation mask at the loop
9054 preheader. Otherwise, it needs to be created inside the loop.
9055 This can only occur when vectorizing memory accesses in the inner-loop
9056 nested within an outer-loop that is being vectorized. */
9058 if (nested_in_vect_loop
9059 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9060 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9062 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9063 compute_in_loop
= true;
9066 bool diff_first_stmt_info
9067 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9069 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9070 || alignment_support_scheme
== dr_explicit_realign
)
9071 && !compute_in_loop
)
9073 /* If we have different first_stmt_info, we can't set up realignment
9074 here, since we can't guarantee first_stmt_info DR has been
9075 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9076 distance from first_stmt_info DR instead as below. */
9077 if (!diff_first_stmt_info
)
9078 msq
= vect_setup_realignment (vinfo
,
9079 first_stmt_info
, gsi
, &realignment_token
,
9080 alignment_support_scheme
, NULL_TREE
,
9082 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9084 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9085 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9087 gcc_assert (!first_stmt_info_for_drptr
);
9093 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9094 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9097 tree vec_offset
= NULL_TREE
;
9098 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9100 aggr_type
= NULL_TREE
;
9103 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9105 aggr_type
= elem_type
;
9106 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9107 &bump
, &vec_offset
);
9111 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9112 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9114 aggr_type
= vectype
;
9115 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9116 memory_access_type
);
9119 vec
<tree
> vec_offsets
= vNULL
;
9120 auto_vec
<tree
> vec_masks
;
9122 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
9123 mask
, &vec_masks
, mask_vectype
, NULL_TREE
);
9124 tree vec_mask
= NULL_TREE
;
9125 poly_uint64 group_elt
= 0;
9126 for (j
= 0; j
< ncopies
; j
++)
9128 /* 1. Create the vector or array pointer update chain. */
9131 bool simd_lane_access_p
9132 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9133 if (simd_lane_access_p
9134 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9135 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9136 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9137 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9138 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9139 get_alias_set (TREE_TYPE (ref_type
)))
9140 && (alignment_support_scheme
== dr_aligned
9141 || alignment_support_scheme
== dr_unaligned_supported
))
9143 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9144 dataref_offset
= build_int_cst (ref_type
, 0);
9146 else if (diff_first_stmt_info
)
9149 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9150 aggr_type
, at_loop
, offset
, &dummy
,
9151 gsi
, &ptr_incr
, simd_lane_access_p
,
9153 /* Adjust the pointer by the difference to first_stmt. */
9154 data_reference_p ptrdr
9155 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9157 = fold_convert (sizetype
,
9158 size_binop (MINUS_EXPR
,
9159 DR_INIT (first_dr_info
->dr
),
9161 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9163 if (alignment_support_scheme
== dr_explicit_realign
)
9165 msq
= vect_setup_realignment (vinfo
,
9166 first_stmt_info_for_drptr
, gsi
,
9168 alignment_support_scheme
,
9169 dataref_ptr
, &at_loop
);
9170 gcc_assert (!compute_in_loop
);
9173 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9175 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
9176 &dataref_ptr
, &vec_offsets
, ncopies
);
9177 vec_offset
= vec_offsets
[0];
9181 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9183 offset
, &dummy
, gsi
, &ptr_incr
,
9187 vec_mask
= vec_masks
[0];
9192 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9194 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9195 vec_offset
= vec_offsets
[j
];
9197 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9200 vec_mask
= vec_masks
[j
];
9203 if (grouped_load
|| slp_perm
)
9204 dr_chain
.create (vec_num
);
9206 gimple
*new_stmt
= NULL
;
9207 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9211 vec_array
= create_vector_array (vectype
, vec_num
);
9213 tree final_mask
= NULL_TREE
;
9215 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9218 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9225 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9227 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9228 tree alias_ptr
= build_int_cst (ref_type
, align
);
9229 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9230 dataref_ptr
, alias_ptr
,
9236 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9237 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9238 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9240 gimple_call_set_lhs (call
, vec_array
);
9241 gimple_call_set_nothrow (call
, true);
9242 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9245 /* Extract each vector into an SSA_NAME. */
9246 for (i
= 0; i
< vec_num
; i
++)
9248 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9250 dr_chain
.quick_push (new_temp
);
9253 /* Record the mapping between SSA_NAMEs and statements. */
9254 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9256 /* Record that VEC_ARRAY is now dead. */
9257 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9261 for (i
= 0; i
< vec_num
; i
++)
9263 tree final_mask
= NULL_TREE
;
9265 && memory_access_type
!= VMAT_INVARIANT
)
9266 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9268 vectype
, vec_num
* j
+ i
);
9270 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9274 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9275 gsi
, stmt_info
, bump
);
9277 /* 2. Create the vector-load in the loop. */
9278 switch (alignment_support_scheme
)
9281 case dr_unaligned_supported
:
9283 unsigned int misalign
;
9284 unsigned HOST_WIDE_INT align
;
9286 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9288 tree zero
= build_zero_cst (vectype
);
9289 tree scale
= size_int (gs_info
.scale
);
9292 call
= gimple_build_call_internal
9293 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9294 vec_offset
, scale
, zero
, final_mask
);
9296 call
= gimple_build_call_internal
9297 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9298 vec_offset
, scale
, zero
);
9299 gimple_call_set_nothrow (call
, true);
9301 data_ref
= NULL_TREE
;
9306 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9307 if (alignment_support_scheme
== dr_aligned
)
9309 gcc_assert (aligned_access_p (first_dr_info
));
9312 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9314 align
= dr_alignment
9315 (vect_dr_behavior (vinfo
, first_dr_info
));
9319 misalign
= DR_MISALIGNMENT (first_dr_info
);
9320 if (dataref_offset
== NULL_TREE
9321 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9322 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9327 align
= least_bit_hwi (misalign
| align
);
9328 tree ptr
= build_int_cst (ref_type
, align
);
9330 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9333 gimple_call_set_nothrow (call
, true);
9335 data_ref
= NULL_TREE
;
9337 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
9340 = vect_get_loop_len (loop_vinfo
, loop_lens
,
9343 align
= least_bit_hwi (misalign
| align
);
9344 tree ptr
= build_int_cst (ref_type
, align
);
9346 = gimple_build_call_internal (IFN_LEN_LOAD
, 3,
9349 gimple_call_set_nothrow (call
, true);
9351 data_ref
= NULL_TREE
;
9353 /* Need conversion if it's wrapped with VnQI. */
9354 machine_mode vmode
= TYPE_MODE (vectype
);
9355 opt_machine_mode new_ovmode
9356 = get_len_load_store_mode (vmode
, true);
9357 machine_mode new_vmode
= new_ovmode
.require ();
9358 if (vmode
!= new_vmode
)
9360 tree qi_type
= unsigned_intQI_type_node
;
9362 = build_vector_type_for_mode (qi_type
, new_vmode
);
9363 tree var
= vect_get_new_ssa_name (new_vtype
,
9365 gimple_set_lhs (call
, var
);
9366 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
9368 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
9370 = gimple_build_assign (vec_dest
,
9371 VIEW_CONVERT_EXPR
, op
);
9376 tree ltype
= vectype
;
9377 tree new_vtype
= NULL_TREE
;
9378 unsigned HOST_WIDE_INT gap
9379 = DR_GROUP_GAP (first_stmt_info
);
9380 unsigned int vect_align
9381 = vect_known_alignment_in_bytes (first_dr_info
);
9382 unsigned int scalar_dr_size
9383 = vect_get_scalar_dr_size (first_dr_info
);
9384 /* If there's no peeling for gaps but we have a gap
9385 with slp loads then load the lower half of the
9386 vector only. See get_group_load_store_type for
9387 when we apply this optimization. */
9390 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9392 && known_eq (nunits
, (group_size
- gap
) * 2)
9393 && known_eq (nunits
, group_size
)
9394 && gap
>= (vect_align
/ scalar_dr_size
))
9398 = vector_vector_composition_type (vectype
, 2,
9400 if (new_vtype
!= NULL_TREE
)
9404 = (dataref_offset
? dataref_offset
9405 : build_int_cst (ref_type
, 0));
9406 if (ltype
!= vectype
9407 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9409 unsigned HOST_WIDE_INT gap_offset
9410 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9411 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9412 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9415 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9416 if (alignment_support_scheme
== dr_aligned
)
9418 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9419 TREE_TYPE (data_ref
)
9420 = build_aligned_type (TREE_TYPE (data_ref
),
9421 align
* BITS_PER_UNIT
);
9423 TREE_TYPE (data_ref
)
9424 = build_aligned_type (TREE_TYPE (data_ref
),
9425 TYPE_ALIGN (elem_type
));
9426 if (ltype
!= vectype
)
9428 vect_copy_ref_info (data_ref
,
9429 DR_REF (first_dr_info
->dr
));
9430 tree tem
= make_ssa_name (ltype
);
9431 new_stmt
= gimple_build_assign (tem
, data_ref
);
9432 vect_finish_stmt_generation (vinfo
, stmt_info
,
9435 vec
<constructor_elt
, va_gc
> *v
;
9437 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9439 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9440 build_zero_cst (ltype
));
9441 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9445 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9446 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9447 build_zero_cst (ltype
));
9449 gcc_assert (new_vtype
!= NULL_TREE
);
9450 if (new_vtype
== vectype
)
9451 new_stmt
= gimple_build_assign (
9452 vec_dest
, build_constructor (vectype
, v
));
9455 tree new_vname
= make_ssa_name (new_vtype
);
9456 new_stmt
= gimple_build_assign (
9457 new_vname
, build_constructor (new_vtype
, v
));
9458 vect_finish_stmt_generation (vinfo
, stmt_info
,
9460 new_stmt
= gimple_build_assign (
9461 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9468 case dr_explicit_realign
:
9472 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9474 if (compute_in_loop
)
9475 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9477 dr_explicit_realign
,
9480 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9481 ptr
= copy_ssa_name (dataref_ptr
);
9483 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9484 // For explicit realign the target alignment should be
9485 // known at compile time.
9486 unsigned HOST_WIDE_INT align
=
9487 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9488 new_stmt
= gimple_build_assign
9489 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9491 (TREE_TYPE (dataref_ptr
),
9492 -(HOST_WIDE_INT
) align
));
9493 vect_finish_stmt_generation (vinfo
, stmt_info
,
9496 = build2 (MEM_REF
, vectype
, ptr
,
9497 build_int_cst (ref_type
, 0));
9498 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9499 vec_dest
= vect_create_destination_var (scalar_dest
,
9501 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9502 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9503 gimple_assign_set_lhs (new_stmt
, new_temp
);
9504 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9505 vect_finish_stmt_generation (vinfo
, stmt_info
,
9509 bump
= size_binop (MULT_EXPR
, vs
,
9510 TYPE_SIZE_UNIT (elem_type
));
9511 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9512 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9514 new_stmt
= gimple_build_assign
9515 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9517 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9518 ptr
= copy_ssa_name (ptr
, new_stmt
);
9519 gimple_assign_set_lhs (new_stmt
, ptr
);
9520 vect_finish_stmt_generation (vinfo
, stmt_info
,
9523 = build2 (MEM_REF
, vectype
, ptr
,
9524 build_int_cst (ref_type
, 0));
9527 case dr_explicit_realign_optimized
:
9529 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9530 new_temp
= copy_ssa_name (dataref_ptr
);
9532 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9533 // We should only be doing this if we know the target
9534 // alignment at compile time.
9535 unsigned HOST_WIDE_INT align
=
9536 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9537 new_stmt
= gimple_build_assign
9538 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9539 build_int_cst (TREE_TYPE (dataref_ptr
),
9540 -(HOST_WIDE_INT
) align
));
9541 vect_finish_stmt_generation (vinfo
, stmt_info
,
9544 = build2 (MEM_REF
, vectype
, new_temp
,
9545 build_int_cst (ref_type
, 0));
9551 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9552 /* DATA_REF is null if we've already built the statement. */
9555 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9556 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9558 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9559 gimple_set_lhs (new_stmt
, new_temp
);
9560 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9562 /* 3. Handle explicit realignment if necessary/supported.
9564 vec_dest = realign_load (msq, lsq, realignment_token) */
9565 if (alignment_support_scheme
== dr_explicit_realign_optimized
9566 || alignment_support_scheme
== dr_explicit_realign
)
9568 lsq
= gimple_assign_lhs (new_stmt
);
9569 if (!realignment_token
)
9570 realignment_token
= dataref_ptr
;
9571 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9572 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9573 msq
, lsq
, realignment_token
);
9574 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9575 gimple_assign_set_lhs (new_stmt
, new_temp
);
9576 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9578 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9581 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9582 add_phi_arg (phi
, lsq
,
9583 loop_latch_edge (containing_loop
),
9589 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9591 tree perm_mask
= perm_mask_for_reverse (vectype
);
9592 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9593 perm_mask
, stmt_info
, gsi
);
9594 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9597 /* Collect vector loads and later create their permutation in
9598 vect_transform_grouped_load (). */
9599 if (grouped_load
|| slp_perm
)
9600 dr_chain
.quick_push (new_temp
);
9602 /* Store vector loads in the corresponding SLP_NODE. */
9603 if (slp
&& !slp_perm
)
9604 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9606 /* With SLP permutation we load the gaps as well, without
9607 we need to skip the gaps after we manage to fully load
9608 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9609 group_elt
+= nunits
;
9610 if (maybe_ne (group_gap_adj
, 0U)
9612 && known_eq (group_elt
, group_size
- group_gap_adj
))
9614 poly_wide_int bump_val
9615 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9617 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9618 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9619 gsi
, stmt_info
, bump
);
9623 /* Bump the vector pointer to account for a gap or for excess
9624 elements loaded for a permuted SLP load. */
9625 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9627 poly_wide_int bump_val
9628 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9630 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9631 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9636 if (slp
&& !slp_perm
)
9642 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
9643 gsi
, vf
, false, &n_perms
);
9650 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9651 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9653 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9657 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9660 dr_chain
.release ();
9663 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9668 /* Function vect_is_simple_cond.
9671 LOOP - the loop that is being vectorized.
9672 COND - Condition that is checked for simple use.
9675 *COMP_VECTYPE - the vector type for the comparison.
9676 *DTS - The def types for the arguments of the comparison
9678 Returns whether a COND can be vectorized. Checks whether
9679 condition operands are supportable using vec_is_simple_use. */
9682 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
9683 slp_tree slp_node
, tree
*comp_vectype
,
9684 enum vect_def_type
*dts
, tree vectype
)
9687 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9691 if (TREE_CODE (cond
) == SSA_NAME
9692 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9694 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
9695 &slp_op
, &dts
[0], comp_vectype
)
9697 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9702 if (!COMPARISON_CLASS_P (cond
))
9705 lhs
= TREE_OPERAND (cond
, 0);
9706 rhs
= TREE_OPERAND (cond
, 1);
9708 if (TREE_CODE (lhs
) == SSA_NAME
)
9710 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
9711 &lhs
, &slp_op
, &dts
[0], &vectype1
))
9714 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9715 || TREE_CODE (lhs
) == FIXED_CST
)
9716 dts
[0] = vect_constant_def
;
9720 if (TREE_CODE (rhs
) == SSA_NAME
)
9722 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
9723 &rhs
, &slp_op
, &dts
[1], &vectype2
))
9726 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
9727 || TREE_CODE (rhs
) == FIXED_CST
)
9728 dts
[1] = vect_constant_def
;
9732 if (vectype1
&& vectype2
9733 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9734 TYPE_VECTOR_SUBPARTS (vectype2
)))
9737 *comp_vectype
= vectype1
? vectype1
: vectype2
;
9738 /* Invariant comparison. */
9739 if (! *comp_vectype
)
9741 tree scalar_type
= TREE_TYPE (lhs
);
9742 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9743 *comp_vectype
= truth_type_for (vectype
);
9746 /* If we can widen the comparison to match vectype do so. */
9747 if (INTEGRAL_TYPE_P (scalar_type
)
9749 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
9750 TYPE_SIZE (TREE_TYPE (vectype
))))
9751 scalar_type
= build_nonstandard_integer_type
9752 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
9753 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
9761 /* vectorizable_condition.
9763 Check if STMT_INFO is conditional modify expression that can be vectorized.
9764 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9765 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9768 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9770 Return true if STMT_INFO is vectorizable in this way. */
9773 vectorizable_condition (vec_info
*vinfo
,
9774 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9776 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9778 tree scalar_dest
= NULL_TREE
;
9779 tree vec_dest
= NULL_TREE
;
9780 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
9781 tree then_clause
, else_clause
;
9782 tree comp_vectype
= NULL_TREE
;
9783 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
9784 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
9787 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
9788 enum vect_def_type dts
[4]
9789 = {vect_unknown_def_type
, vect_unknown_def_type
,
9790 vect_unknown_def_type
, vect_unknown_def_type
};
9794 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9796 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
9797 vec
<tree
> vec_oprnds0
= vNULL
;
9798 vec
<tree
> vec_oprnds1
= vNULL
;
9799 vec
<tree
> vec_oprnds2
= vNULL
;
9800 vec
<tree
> vec_oprnds3
= vNULL
;
9802 bool masked
= false;
9804 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9807 /* Is vectorizable conditional operation? */
9808 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9812 code
= gimple_assign_rhs_code (stmt
);
9813 if (code
!= COND_EXPR
)
9816 stmt_vec_info reduc_info
= NULL
;
9817 int reduc_index
= -1;
9818 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
9820 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
9823 if (STMT_SLP_TYPE (stmt_info
))
9825 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
9826 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
9827 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
9828 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
9829 || reduc_index
!= -1);
9833 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
9837 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9838 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9843 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9847 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9851 gcc_assert (ncopies
>= 1);
9852 if (for_reduction
&& ncopies
> 1)
9853 return false; /* FORNOW */
9855 cond_expr
= gimple_assign_rhs1 (stmt
);
9857 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
9858 &comp_vectype
, &dts
[0], vectype
)
9862 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
9863 slp_tree then_slp_node
, else_slp_node
;
9864 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
9865 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
9867 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
9868 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
9871 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
9874 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
9877 masked
= !COMPARISON_CLASS_P (cond_expr
);
9878 vec_cmp_type
= truth_type_for (comp_vectype
);
9880 if (vec_cmp_type
== NULL_TREE
)
9883 cond_code
= TREE_CODE (cond_expr
);
9886 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
9887 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
9890 /* For conditional reductions, the "then" value needs to be the candidate
9891 value calculated by this iteration while the "else" value needs to be
9892 the result carried over from previous iterations. If the COND_EXPR
9893 is the other way around, we need to swap it. */
9894 bool must_invert_cmp_result
= false;
9895 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
9898 must_invert_cmp_result
= true;
9901 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
9902 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
9903 if (new_code
== ERROR_MARK
)
9904 must_invert_cmp_result
= true;
9907 cond_code
= new_code
;
9908 /* Make sure we don't accidentally use the old condition. */
9909 cond_expr
= NULL_TREE
;
9912 std::swap (then_clause
, else_clause
);
9915 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
9917 /* Boolean values may have another representation in vectors
9918 and therefore we prefer bit operations over comparison for
9919 them (which also works for scalar masks). We store opcodes
9920 to use in bitop1 and bitop2. Statement is vectorized as
9921 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9922 depending on bitop1 and bitop2 arity. */
9926 bitop1
= BIT_NOT_EXPR
;
9927 bitop2
= BIT_AND_EXPR
;
9930 bitop1
= BIT_NOT_EXPR
;
9931 bitop2
= BIT_IOR_EXPR
;
9934 bitop1
= BIT_NOT_EXPR
;
9935 bitop2
= BIT_AND_EXPR
;
9936 std::swap (cond_expr0
, cond_expr1
);
9939 bitop1
= BIT_NOT_EXPR
;
9940 bitop2
= BIT_IOR_EXPR
;
9941 std::swap (cond_expr0
, cond_expr1
);
9944 bitop1
= BIT_XOR_EXPR
;
9947 bitop1
= BIT_XOR_EXPR
;
9948 bitop2
= BIT_NOT_EXPR
;
9953 cond_code
= SSA_NAME
;
9956 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
9957 && reduction_type
== EXTRACT_LAST_REDUCTION
9958 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
9960 if (dump_enabled_p ())
9961 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9962 "reduction comparison operation not supported.\n");
9968 if (bitop1
!= NOP_EXPR
)
9970 machine_mode mode
= TYPE_MODE (comp_vectype
);
9973 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
9974 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9977 if (bitop2
!= NOP_EXPR
)
9979 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
9981 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9986 vect_cost_for_stmt kind
= vector_stmt
;
9987 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
9988 /* Count one reduction-like operation per vector. */
9989 kind
= vec_to_scalar
;
9990 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
9994 && (!vect_maybe_update_slp_op_vectype
9995 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
9997 && !vect_maybe_update_slp_op_vectype
9998 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
9999 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10000 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10002 if (dump_enabled_p ())
10003 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10004 "incompatible vector types for invariants\n");
10008 if (loop_vinfo
&& for_reduction
10009 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10011 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10012 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10013 ncopies
* vec_num
, vectype
, NULL
);
10014 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10015 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10017 if (dump_enabled_p ())
10018 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10019 "conditional reduction prevents the use"
10020 " of partial vectors.\n");
10021 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10025 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10026 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10035 vec_oprnds0
.create (1);
10036 vec_oprnds1
.create (1);
10037 vec_oprnds2
.create (1);
10038 vec_oprnds3
.create (1);
10042 scalar_dest
= gimple_assign_lhs (stmt
);
10043 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10044 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10046 bool swap_cond_operands
= false;
10048 /* See whether another part of the vectorized code applies a loop
10049 mask to the condition, or to its inverse. */
10051 vec_loop_masks
*masks
= NULL
;
10052 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10054 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10055 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10058 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10059 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10060 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10063 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10064 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10065 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10067 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10068 cond_code
= cond
.code
;
10069 swap_cond_operands
= true;
10075 /* Handle cond expr. */
10077 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10078 cond_expr
, &vec_oprnds0
, comp_vectype
,
10079 then_clause
, &vec_oprnds2
, vectype
,
10080 reduction_type
!= EXTRACT_LAST_REDUCTION
10081 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10083 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10084 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10085 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10086 then_clause
, &vec_oprnds2
, vectype
,
10087 reduction_type
!= EXTRACT_LAST_REDUCTION
10088 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10090 /* Arguments are ready. Create the new vector stmt. */
10091 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10093 vec_then_clause
= vec_oprnds2
[i
];
10094 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10095 vec_else_clause
= vec_oprnds3
[i
];
10097 if (swap_cond_operands
)
10098 std::swap (vec_then_clause
, vec_else_clause
);
10101 vec_compare
= vec_cond_lhs
;
10104 vec_cond_rhs
= vec_oprnds1
[i
];
10105 if (bitop1
== NOP_EXPR
)
10107 gimple_seq stmts
= NULL
;
10108 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10109 vec_cond_lhs
, vec_cond_rhs
);
10110 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10114 new_temp
= make_ssa_name (vec_cmp_type
);
10116 if (bitop1
== BIT_NOT_EXPR
)
10117 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10121 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10123 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10124 if (bitop2
== NOP_EXPR
)
10125 vec_compare
= new_temp
;
10126 else if (bitop2
== BIT_NOT_EXPR
)
10128 /* Instead of doing ~x ? y : z do x ? z : y. */
10129 vec_compare
= new_temp
;
10130 std::swap (vec_then_clause
, vec_else_clause
);
10134 vec_compare
= make_ssa_name (vec_cmp_type
);
10136 = gimple_build_assign (vec_compare
, bitop2
,
10137 vec_cond_lhs
, new_temp
);
10138 vect_finish_stmt_generation (vinfo
, stmt_info
,
10144 /* If we decided to apply a loop mask to the result of the vector
10145 comparison, AND the comparison with the mask now. Later passes
10146 should then be able to reuse the AND results between mulitple
10150 for (int i = 0; i < 100; ++i)
10151 x[i] = y[i] ? z[i] : 10;
10153 results in following optimized GIMPLE:
10155 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10156 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10157 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10158 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10159 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10160 vect_iftmp.11_47, { 10, ... }>;
10162 instead of using a masked and unmasked forms of
10163 vec != { 0, ... } (masked in the MASK_LOAD,
10164 unmasked in the VEC_COND_EXPR). */
10166 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10167 in cases where that's necessary. */
10169 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10171 if (!is_gimple_val (vec_compare
))
10173 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10174 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10176 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10177 vec_compare
= vec_compare_name
;
10180 if (must_invert_cmp_result
)
10182 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10183 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10186 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10187 vec_compare
= vec_compare_name
;
10192 unsigned vec_num
= vec_oprnds0
.length ();
10194 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10196 tree tmp2
= make_ssa_name (vec_cmp_type
);
10198 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10200 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10201 vec_compare
= tmp2
;
10206 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10208 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10209 tree lhs
= gimple_get_lhs (old_stmt
);
10210 new_stmt
= gimple_build_call_internal
10211 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10213 gimple_call_set_lhs (new_stmt
, lhs
);
10214 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10215 if (old_stmt
== gsi_stmt (*gsi
))
10216 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10219 /* In this case we're moving the definition to later in the
10220 block. That doesn't matter because the only uses of the
10221 lhs are in phi statements. */
10222 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10223 gsi_remove (&old_gsi
, true);
10224 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10229 new_temp
= make_ssa_name (vec_dest
);
10230 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10231 vec_then_clause
, vec_else_clause
);
10232 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10235 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10237 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10241 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10243 vec_oprnds0
.release ();
10244 vec_oprnds1
.release ();
10245 vec_oprnds2
.release ();
10246 vec_oprnds3
.release ();
10251 /* vectorizable_comparison.
10253 Check if STMT_INFO is comparison expression that can be vectorized.
10254 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10255 comparison, put it in VEC_STMT, and insert it at GSI.
10257 Return true if STMT_INFO is vectorizable in this way. */
10260 vectorizable_comparison (vec_info
*vinfo
,
10261 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10263 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10265 tree lhs
, rhs1
, rhs2
;
10266 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10267 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10268 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10270 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10271 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10273 poly_uint64 nunits
;
10275 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10277 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10278 vec
<tree
> vec_oprnds0
= vNULL
;
10279 vec
<tree
> vec_oprnds1
= vNULL
;
10283 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10286 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10289 mask_type
= vectype
;
10290 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10295 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10297 gcc_assert (ncopies
>= 1);
10298 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10301 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10305 code
= gimple_assign_rhs_code (stmt
);
10307 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10310 slp_tree slp_rhs1
, slp_rhs2
;
10311 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10312 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10315 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10316 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10319 if (vectype1
&& vectype2
10320 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10321 TYPE_VECTOR_SUBPARTS (vectype2
)))
10324 vectype
= vectype1
? vectype1
: vectype2
;
10326 /* Invariant comparison. */
10329 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10330 vectype
= mask_type
;
10332 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10334 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10337 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10340 /* Can't compare mask and non-mask types. */
10341 if (vectype1
&& vectype2
10342 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10345 /* Boolean values may have another representation in vectors
10346 and therefore we prefer bit operations over comparison for
10347 them (which also works for scalar masks). We store opcodes
10348 to use in bitop1 and bitop2. Statement is vectorized as
10349 BITOP2 (rhs1 BITOP1 rhs2) or
10350 rhs1 BITOP2 (BITOP1 rhs2)
10351 depending on bitop1 and bitop2 arity. */
10352 bool swap_p
= false;
10353 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10355 if (code
== GT_EXPR
)
10357 bitop1
= BIT_NOT_EXPR
;
10358 bitop2
= BIT_AND_EXPR
;
10360 else if (code
== GE_EXPR
)
10362 bitop1
= BIT_NOT_EXPR
;
10363 bitop2
= BIT_IOR_EXPR
;
10365 else if (code
== LT_EXPR
)
10367 bitop1
= BIT_NOT_EXPR
;
10368 bitop2
= BIT_AND_EXPR
;
10371 else if (code
== LE_EXPR
)
10373 bitop1
= BIT_NOT_EXPR
;
10374 bitop2
= BIT_IOR_EXPR
;
10379 bitop1
= BIT_XOR_EXPR
;
10380 if (code
== EQ_EXPR
)
10381 bitop2
= BIT_NOT_EXPR
;
10387 if (bitop1
== NOP_EXPR
)
10389 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10394 machine_mode mode
= TYPE_MODE (vectype
);
10397 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10398 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10401 if (bitop2
!= NOP_EXPR
)
10403 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10404 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10409 /* Put types on constant and invariant SLP children. */
10411 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10412 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10414 if (dump_enabled_p ())
10415 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10416 "incompatible vector types for invariants\n");
10420 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10421 vect_model_simple_cost (vinfo
, stmt_info
,
10422 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10423 dts
, ndts
, slp_node
, cost_vec
);
10430 vec_oprnds0
.create (1);
10431 vec_oprnds1
.create (1);
10435 lhs
= gimple_assign_lhs (stmt
);
10436 mask
= vect_create_destination_var (lhs
, mask_type
);
10438 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10439 rhs1
, &vec_oprnds0
, vectype
,
10440 rhs2
, &vec_oprnds1
, vectype
);
10442 std::swap (vec_oprnds0
, vec_oprnds1
);
10444 /* Arguments are ready. Create the new vector stmt. */
10445 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10448 vec_rhs2
= vec_oprnds1
[i
];
10450 new_temp
= make_ssa_name (mask
);
10451 if (bitop1
== NOP_EXPR
)
10453 new_stmt
= gimple_build_assign (new_temp
, code
,
10454 vec_rhs1
, vec_rhs2
);
10455 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10459 if (bitop1
== BIT_NOT_EXPR
)
10460 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10462 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10464 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10465 if (bitop2
!= NOP_EXPR
)
10467 tree res
= make_ssa_name (mask
);
10468 if (bitop2
== BIT_NOT_EXPR
)
10469 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10471 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10473 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10477 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10479 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10483 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10485 vec_oprnds0
.release ();
10486 vec_oprnds1
.release ();
10491 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10492 can handle all live statements in the node. Otherwise return true
10493 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10494 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10497 can_vectorize_live_stmts (vec_info
*vinfo
,
10498 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10499 slp_tree slp_node
, slp_instance slp_node_instance
,
10501 stmt_vector_for_cost
*cost_vec
)
10505 stmt_vec_info slp_stmt_info
;
10507 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10509 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10510 && !vectorizable_live_operation (vinfo
,
10511 slp_stmt_info
, gsi
, slp_node
,
10512 slp_node_instance
, i
,
10513 vec_stmt_p
, cost_vec
))
10517 else if (STMT_VINFO_LIVE_P (stmt_info
)
10518 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
10519 slp_node
, slp_node_instance
, -1,
10520 vec_stmt_p
, cost_vec
))
10526 /* Make sure the statement is vectorizable. */
10529 vect_analyze_stmt (vec_info
*vinfo
,
10530 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10531 slp_tree node
, slp_instance node_instance
,
10532 stmt_vector_for_cost
*cost_vec
)
10534 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10535 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10537 gimple_seq pattern_def_seq
;
10539 if (dump_enabled_p ())
10540 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10543 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10544 return opt_result::failure_at (stmt_info
->stmt
,
10546 " stmt has volatile operands: %G\n",
10549 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10551 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10553 gimple_stmt_iterator si
;
10555 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10557 stmt_vec_info pattern_def_stmt_info
10558 = vinfo
->lookup_stmt (gsi_stmt (si
));
10559 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10560 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10562 /* Analyze def stmt of STMT if it's a pattern stmt. */
10563 if (dump_enabled_p ())
10564 dump_printf_loc (MSG_NOTE
, vect_location
,
10565 "==> examining pattern def statement: %G",
10566 pattern_def_stmt_info
->stmt
);
10569 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10570 need_to_vectorize
, node
, node_instance
,
10578 /* Skip stmts that do not need to be vectorized. In loops this is expected
10580 - the COND_EXPR which is the loop exit condition
10581 - any LABEL_EXPRs in the loop
10582 - computations that are used only for array indexing or loop control.
10583 In basic blocks we only analyze statements that are a part of some SLP
10584 instance, therefore, all the statements are relevant.
10586 Pattern statement needs to be analyzed instead of the original statement
10587 if the original statement is not relevant. Otherwise, we analyze both
10588 statements. In basic blocks we are called from some SLP instance
10589 traversal, don't analyze pattern stmts instead, the pattern stmts
10590 already will be part of SLP instance. */
10592 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10593 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10594 && !STMT_VINFO_LIVE_P (stmt_info
))
10596 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10597 && pattern_stmt_info
10598 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10599 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10601 /* Analyze PATTERN_STMT instead of the original stmt. */
10602 stmt_info
= pattern_stmt_info
;
10603 if (dump_enabled_p ())
10604 dump_printf_loc (MSG_NOTE
, vect_location
,
10605 "==> examining pattern statement: %G",
10610 if (dump_enabled_p ())
10611 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10613 return opt_result::success ();
10616 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10618 && pattern_stmt_info
10619 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10620 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10622 /* Analyze PATTERN_STMT too. */
10623 if (dump_enabled_p ())
10624 dump_printf_loc (MSG_NOTE
, vect_location
,
10625 "==> examining pattern statement: %G",
10626 pattern_stmt_info
->stmt
);
10629 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
10630 node_instance
, cost_vec
);
10635 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10637 case vect_internal_def
:
10640 case vect_reduction_def
:
10641 case vect_nested_cycle
:
10642 gcc_assert (!bb_vinfo
10643 && (relevance
== vect_used_in_outer
10644 || relevance
== vect_used_in_outer_by_reduction
10645 || relevance
== vect_used_by_reduction
10646 || relevance
== vect_unused_in_scope
10647 || relevance
== vect_used_only_live
));
10650 case vect_induction_def
:
10651 gcc_assert (!bb_vinfo
);
10654 case vect_constant_def
:
10655 case vect_external_def
:
10656 case vect_unknown_def_type
:
10658 gcc_unreachable ();
10661 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10663 tree type
= gimple_expr_type (stmt_info
->stmt
);
10664 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
10665 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10666 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10667 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10668 *need_to_vectorize
= true;
10671 if (PURE_SLP_STMT (stmt_info
) && !node
)
10673 if (dump_enabled_p ())
10674 dump_printf_loc (MSG_NOTE
, vect_location
,
10675 "handled only by SLP analysis\n");
10676 return opt_result::success ();
10681 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10682 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10683 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10684 -mveclibabi= takes preference over library functions with
10685 the simd attribute. */
10686 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10687 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
10689 || vectorizable_conversion (vinfo
, stmt_info
,
10690 NULL
, NULL
, node
, cost_vec
)
10691 || vectorizable_operation (vinfo
, stmt_info
,
10692 NULL
, NULL
, node
, cost_vec
)
10693 || vectorizable_assignment (vinfo
, stmt_info
,
10694 NULL
, NULL
, node
, cost_vec
)
10695 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10696 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10697 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10698 node
, node_instance
, cost_vec
)
10699 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10700 NULL
, node
, cost_vec
)
10701 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10702 || vectorizable_condition (vinfo
, stmt_info
,
10703 NULL
, NULL
, node
, cost_vec
)
10704 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
10706 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
10707 stmt_info
, NULL
, node
));
10711 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10712 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
10713 NULL
, NULL
, node
, cost_vec
)
10714 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
10716 || vectorizable_shift (vinfo
, stmt_info
,
10717 NULL
, NULL
, node
, cost_vec
)
10718 || vectorizable_operation (vinfo
, stmt_info
,
10719 NULL
, NULL
, node
, cost_vec
)
10720 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
10722 || vectorizable_load (vinfo
, stmt_info
,
10723 NULL
, NULL
, node
, cost_vec
)
10724 || vectorizable_store (vinfo
, stmt_info
,
10725 NULL
, NULL
, node
, cost_vec
)
10726 || vectorizable_condition (vinfo
, stmt_info
,
10727 NULL
, NULL
, node
, cost_vec
)
10728 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
10730 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
10734 return opt_result::failure_at (stmt_info
->stmt
,
10736 " relevant stmt not supported: %G",
10739 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10740 need extra handling, except for vectorizable reductions. */
10742 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10743 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
10744 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
10745 stmt_info
, NULL
, node
, node_instance
,
10747 return opt_result::failure_at (stmt_info
->stmt
,
10749 " live stmt not supported: %G",
10752 return opt_result::success ();
10756 /* Function vect_transform_stmt.
10758 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10761 vect_transform_stmt (vec_info
*vinfo
,
10762 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10763 slp_tree slp_node
, slp_instance slp_node_instance
)
10765 bool is_store
= false;
10766 gimple
*vec_stmt
= NULL
;
10769 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
10771 switch (STMT_VINFO_TYPE (stmt_info
))
10773 case type_demotion_vec_info_type
:
10774 case type_promotion_vec_info_type
:
10775 case type_conversion_vec_info_type
:
10776 done
= vectorizable_conversion (vinfo
, stmt_info
,
10777 gsi
, &vec_stmt
, slp_node
, NULL
);
10781 case induc_vec_info_type
:
10782 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
10783 stmt_info
, &vec_stmt
, slp_node
,
10788 case shift_vec_info_type
:
10789 done
= vectorizable_shift (vinfo
, stmt_info
,
10790 gsi
, &vec_stmt
, slp_node
, NULL
);
10794 case op_vec_info_type
:
10795 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
10800 case assignment_vec_info_type
:
10801 done
= vectorizable_assignment (vinfo
, stmt_info
,
10802 gsi
, &vec_stmt
, slp_node
, NULL
);
10806 case load_vec_info_type
:
10807 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
10812 case store_vec_info_type
:
10813 done
= vectorizable_store (vinfo
, stmt_info
,
10814 gsi
, &vec_stmt
, slp_node
, NULL
);
10816 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
10818 /* In case of interleaving, the whole chain is vectorized when the
10819 last store in the chain is reached. Store stmts before the last
10820 one are skipped, and there vec_stmt_info shouldn't be freed
10822 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10823 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
10830 case condition_vec_info_type
:
10831 done
= vectorizable_condition (vinfo
, stmt_info
,
10832 gsi
, &vec_stmt
, slp_node
, NULL
);
10836 case comparison_vec_info_type
:
10837 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
10842 case call_vec_info_type
:
10843 done
= vectorizable_call (vinfo
, stmt_info
,
10844 gsi
, &vec_stmt
, slp_node
, NULL
);
10847 case call_simd_clone_vec_info_type
:
10848 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
10852 case reduc_vec_info_type
:
10853 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10854 gsi
, &vec_stmt
, slp_node
);
10858 case cycle_phi_info_type
:
10859 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10860 &vec_stmt
, slp_node
, slp_node_instance
);
10864 case lc_phi_info_type
:
10865 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
10866 stmt_info
, &vec_stmt
, slp_node
);
10870 case phi_info_type
:
10871 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
10876 if (!STMT_VINFO_LIVE_P (stmt_info
))
10878 if (dump_enabled_p ())
10879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10880 "stmt not supported.\n");
10881 gcc_unreachable ();
10886 if (!slp_node
&& vec_stmt
)
10887 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
10889 if (STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
10892 /* Handle stmts whose DEF is used outside the loop-nest that is
10893 being vectorized. */
10894 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
10895 slp_node_instance
, true, NULL
);
10902 /* Remove a group of stores (for SLP or interleaving), free their
10906 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
10908 stmt_vec_info next_stmt_info
= first_stmt_info
;
10910 while (next_stmt_info
)
10912 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10913 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
10914 /* Free the attached stmt_vec_info and remove the stmt. */
10915 vinfo
->remove_stmt (next_stmt_info
);
10916 next_stmt_info
= tmp
;
10920 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10921 elements of type SCALAR_TYPE, or null if the target doesn't support
10924 If NUNITS is zero, return a vector type that contains elements of
10925 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10927 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10928 for this vectorization region and want to "autodetect" the best choice.
10929 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10930 and we want the new type to be interoperable with it. PREVAILING_MODE
10931 in this case can be a scalar integer mode or a vector mode; when it
10932 is a vector mode, the function acts like a tree-level version of
10933 related_vector_mode. */
10936 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
10937 tree scalar_type
, poly_uint64 nunits
)
10939 tree orig_scalar_type
= scalar_type
;
10940 scalar_mode inner_mode
;
10941 machine_mode simd_mode
;
10944 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
10945 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
10948 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
10950 /* For vector types of elements whose mode precision doesn't
10951 match their types precision we use a element type of mode
10952 precision. The vectorization routines will have to make sure
10953 they support the proper result truncation/extension.
10954 We also make sure to build vector types with INTEGER_TYPE
10955 component type only. */
10956 if (INTEGRAL_TYPE_P (scalar_type
)
10957 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
10958 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
10959 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
10960 TYPE_UNSIGNED (scalar_type
));
10962 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10963 When the component mode passes the above test simply use a type
10964 corresponding to that mode. The theory is that any use that
10965 would cause problems with this will disable vectorization anyway. */
10966 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
10967 && !INTEGRAL_TYPE_P (scalar_type
))
10968 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
10970 /* We can't build a vector type of elements with alignment bigger than
10972 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
10973 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
10974 TYPE_UNSIGNED (scalar_type
));
10976 /* If we felt back to using the mode fail if there was
10977 no scalar type for it. */
10978 if (scalar_type
== NULL_TREE
)
10981 /* If no prevailing mode was supplied, use the mode the target prefers.
10982 Otherwise lookup a vector mode based on the prevailing mode. */
10983 if (prevailing_mode
== VOIDmode
)
10985 gcc_assert (known_eq (nunits
, 0U));
10986 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
10987 if (SCALAR_INT_MODE_P (simd_mode
))
10989 /* Traditional behavior is not to take the integer mode
10990 literally, but simply to use it as a way of determining
10991 the vector size. It is up to mode_for_vector to decide
10992 what the TYPE_MODE should be.
10994 Note that nunits == 1 is allowed in order to support single
10995 element vector types. */
10996 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
10997 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11001 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11002 || !related_vector_mode (prevailing_mode
,
11003 inner_mode
, nunits
).exists (&simd_mode
))
11005 /* Fall back to using mode_for_vector, mostly in the hope of being
11006 able to use an integer mode. */
11007 if (known_eq (nunits
, 0U)
11008 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11011 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11015 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11017 /* In cases where the mode was chosen by mode_for_vector, check that
11018 the target actually supports the chosen mode, or that it at least
11019 allows the vector mode to be replaced by a like-sized integer. */
11020 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11021 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11024 /* Re-attach the address-space qualifier if we canonicalized the scalar
11026 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11027 return build_qualified_type
11028 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11033 /* Function get_vectype_for_scalar_type.
11035 Returns the vector type corresponding to SCALAR_TYPE as supported
11036 by the target. If GROUP_SIZE is nonzero and we're performing BB
11037 vectorization, make sure that the number of elements in the vector
11038 is no bigger than GROUP_SIZE. */
11041 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11042 unsigned int group_size
)
11044 /* For BB vectorization, we should always have a group size once we've
11045 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11046 are tentative requests during things like early data reference
11047 analysis and pattern recognition. */
11048 if (is_a
<bb_vec_info
> (vinfo
))
11049 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11053 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11055 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11056 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11058 /* Register the natural choice of vector type, before the group size
11059 has been applied. */
11061 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11063 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11064 try again with an explicit number of elements. */
11067 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11069 /* Start with the biggest number of units that fits within
11070 GROUP_SIZE and halve it until we find a valid vector type.
11071 Usually either the first attempt will succeed or all will
11072 fail (in the latter case because GROUP_SIZE is too small
11073 for the target), but it's possible that a target could have
11074 a hole between supported vector types.
11076 If GROUP_SIZE is not a power of 2, this has the effect of
11077 trying the largest power of 2 that fits within the group,
11078 even though the group is not a multiple of that vector size.
11079 The BB vectorizer will then try to carve up the group into
11081 unsigned int nunits
= 1 << floor_log2 (group_size
);
11084 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11085 scalar_type
, nunits
);
11088 while (nunits
> 1 && !vectype
);
11094 /* Return the vector type corresponding to SCALAR_TYPE as supported
11095 by the target. NODE, if nonnull, is the SLP tree node that will
11096 use the returned vector type. */
11099 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11101 unsigned int group_size
= 0;
11103 group_size
= SLP_TREE_LANES (node
);
11104 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11107 /* Function get_mask_type_for_scalar_type.
11109 Returns the mask type corresponding to a result of comparison
11110 of vectors of specified SCALAR_TYPE as supported by target.
11111 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11112 make sure that the number of elements in the vector is no bigger
11113 than GROUP_SIZE. */
11116 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11117 unsigned int group_size
)
11119 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11124 return truth_type_for (vectype
);
11127 /* Function get_same_sized_vectype
11129 Returns a vector type corresponding to SCALAR_TYPE of size
11130 VECTOR_TYPE if supported by the target. */
11133 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11135 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11136 return truth_type_for (vector_type
);
11138 poly_uint64 nunits
;
11139 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11140 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11143 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11144 scalar_type
, nunits
);
11147 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11148 would not change the chosen vector modes. */
11151 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11153 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11154 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11155 if (!VECTOR_MODE_P (*i
)
11156 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11161 /* Function vect_is_simple_use.
11164 VINFO - the vect info of the loop or basic block that is being vectorized.
11165 OPERAND - operand in the loop or bb.
11167 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11168 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11169 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11170 the definition could be anywhere in the function
11171 DT - the type of definition
11173 Returns whether a stmt with OPERAND can be vectorized.
11174 For loops, supportable operands are constants, loop invariants, and operands
11175 that are defined by the current iteration of the loop. Unsupportable
11176 operands are those that are defined by a previous iteration of the loop (as
11177 is the case in reduction/induction computations).
11178 For basic blocks, supportable operands are constants and bb invariants.
11179 For now, operands defined outside the basic block are not supported. */
11182 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11183 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11185 if (def_stmt_info_out
)
11186 *def_stmt_info_out
= NULL
;
11188 *def_stmt_out
= NULL
;
11189 *dt
= vect_unknown_def_type
;
11191 if (dump_enabled_p ())
11193 dump_printf_loc (MSG_NOTE
, vect_location
,
11194 "vect_is_simple_use: operand ");
11195 if (TREE_CODE (operand
) == SSA_NAME
11196 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11197 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11199 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11202 if (CONSTANT_CLASS_P (operand
))
11203 *dt
= vect_constant_def
;
11204 else if (is_gimple_min_invariant (operand
))
11205 *dt
= vect_external_def
;
11206 else if (TREE_CODE (operand
) != SSA_NAME
)
11207 *dt
= vect_unknown_def_type
;
11208 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11209 *dt
= vect_external_def
;
11212 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11213 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11215 *dt
= vect_external_def
;
11218 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11219 def_stmt
= stmt_vinfo
->stmt
;
11220 switch (gimple_code (def_stmt
))
11223 case GIMPLE_ASSIGN
:
11225 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11228 *dt
= vect_unknown_def_type
;
11231 if (def_stmt_info_out
)
11232 *def_stmt_info_out
= stmt_vinfo
;
11235 *def_stmt_out
= def_stmt
;
11238 if (dump_enabled_p ())
11240 dump_printf (MSG_NOTE
, ", type of def: ");
11243 case vect_uninitialized_def
:
11244 dump_printf (MSG_NOTE
, "uninitialized\n");
11246 case vect_constant_def
:
11247 dump_printf (MSG_NOTE
, "constant\n");
11249 case vect_external_def
:
11250 dump_printf (MSG_NOTE
, "external\n");
11252 case vect_internal_def
:
11253 dump_printf (MSG_NOTE
, "internal\n");
11255 case vect_induction_def
:
11256 dump_printf (MSG_NOTE
, "induction\n");
11258 case vect_reduction_def
:
11259 dump_printf (MSG_NOTE
, "reduction\n");
11261 case vect_double_reduction_def
:
11262 dump_printf (MSG_NOTE
, "double reduction\n");
11264 case vect_nested_cycle
:
11265 dump_printf (MSG_NOTE
, "nested cycle\n");
11267 case vect_unknown_def_type
:
11268 dump_printf (MSG_NOTE
, "unknown\n");
11273 if (*dt
== vect_unknown_def_type
)
11275 if (dump_enabled_p ())
11276 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11277 "Unsupported pattern.\n");
11284 /* Function vect_is_simple_use.
11286 Same as vect_is_simple_use but also determines the vector operand
11287 type of OPERAND and stores it to *VECTYPE. If the definition of
11288 OPERAND is vect_uninitialized_def, vect_constant_def or
11289 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11290 is responsible to compute the best suited vector type for the
11294 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11295 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11296 gimple
**def_stmt_out
)
11298 stmt_vec_info def_stmt_info
;
11300 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11304 *def_stmt_out
= def_stmt
;
11305 if (def_stmt_info_out
)
11306 *def_stmt_info_out
= def_stmt_info
;
11308 /* Now get a vector type if the def is internal, otherwise supply
11309 NULL_TREE and leave it up to the caller to figure out a proper
11310 type for the use stmt. */
11311 if (*dt
== vect_internal_def
11312 || *dt
== vect_induction_def
11313 || *dt
== vect_reduction_def
11314 || *dt
== vect_double_reduction_def
11315 || *dt
== vect_nested_cycle
)
11317 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11318 gcc_assert (*vectype
!= NULL_TREE
);
11319 if (dump_enabled_p ())
11320 dump_printf_loc (MSG_NOTE
, vect_location
,
11321 "vect_is_simple_use: vectype %T\n", *vectype
);
11323 else if (*dt
== vect_uninitialized_def
11324 || *dt
== vect_constant_def
11325 || *dt
== vect_external_def
)
11326 *vectype
= NULL_TREE
;
11328 gcc_unreachable ();
11333 /* Function vect_is_simple_use.
11335 Same as vect_is_simple_use but determines the operand by operand
11336 position OPERAND from either STMT or SLP_NODE, filling in *OP
11337 and *SLP_DEF (when SLP_NODE is not NULL). */
11340 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11341 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11342 enum vect_def_type
*dt
,
11343 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11347 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11349 *vectype
= SLP_TREE_VECTYPE (child
);
11350 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11352 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11353 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
11357 if (def_stmt_info_out
)
11358 *def_stmt_info_out
= NULL
;
11359 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11360 *dt
= SLP_TREE_DEF_TYPE (child
);
11367 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11369 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11370 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11373 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11375 *op
= gimple_op (ass
, operand
);
11377 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11378 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11380 *op
= gimple_op (ass
, operand
+ 1);
11382 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11384 if (gimple_call_internal_p (call
)
11385 && internal_store_fn_p (gimple_call_internal_fn (call
)))
11386 operand
= internal_fn_stored_value_index (gimple_call_internal_fn
11388 *op
= gimple_call_arg (call
, operand
);
11391 gcc_unreachable ();
11392 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11396 /* If OP is not NULL and is external or constant update its vector
11397 type with VECTYPE. Returns true if successful or false if not,
11398 for example when conflicting vector types are present. */
11401 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11403 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11405 if (SLP_TREE_VECTYPE (op
))
11406 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11407 SLP_TREE_VECTYPE (op
) = vectype
;
11411 /* Function supportable_widening_operation
11413 Check whether an operation represented by the code CODE is a
11414 widening operation that is supported by the target platform in
11415 vector form (i.e., when operating on arguments of type VECTYPE_IN
11416 producing a result of type VECTYPE_OUT).
11418 Widening operations we currently support are NOP (CONVERT), FLOAT,
11419 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11420 are supported by the target platform either directly (via vector
11421 tree-codes), or via target builtins.
11424 - CODE1 and CODE2 are codes of vector operations to be used when
11425 vectorizing the operation, if available.
11426 - MULTI_STEP_CVT determines the number of required intermediate steps in
11427 case of multi-step conversion (like char->short->int - in that case
11428 MULTI_STEP_CVT will be 1).
11429 - INTERM_TYPES contains the intermediate type required to perform the
11430 widening operation (short in the above example). */
11433 supportable_widening_operation (vec_info
*vinfo
,
11434 enum tree_code code
, stmt_vec_info stmt_info
,
11435 tree vectype_out
, tree vectype_in
,
11436 enum tree_code
*code1
, enum tree_code
*code2
,
11437 int *multi_step_cvt
,
11438 vec
<tree
> *interm_types
)
11440 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11441 class loop
*vect_loop
= NULL
;
11442 machine_mode vec_mode
;
11443 enum insn_code icode1
, icode2
;
11444 optab optab1
, optab2
;
11445 tree vectype
= vectype_in
;
11446 tree wide_vectype
= vectype_out
;
11447 enum tree_code c1
, c2
;
11449 tree prev_type
, intermediate_type
;
11450 machine_mode intermediate_mode
, prev_mode
;
11451 optab optab3
, optab4
;
11453 *multi_step_cvt
= 0;
11455 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11459 case WIDEN_MULT_EXPR
:
11460 /* The result of a vectorized widening operation usually requires
11461 two vectors (because the widened results do not fit into one vector).
11462 The generated vector results would normally be expected to be
11463 generated in the same order as in the original scalar computation,
11464 i.e. if 8 results are generated in each vector iteration, they are
11465 to be organized as follows:
11466 vect1: [res1,res2,res3,res4],
11467 vect2: [res5,res6,res7,res8].
11469 However, in the special case that the result of the widening
11470 operation is used in a reduction computation only, the order doesn't
11471 matter (because when vectorizing a reduction we change the order of
11472 the computation). Some targets can take advantage of this and
11473 generate more efficient code. For example, targets like Altivec,
11474 that support widen_mult using a sequence of {mult_even,mult_odd}
11475 generate the following vectors:
11476 vect1: [res1,res3,res5,res7],
11477 vect2: [res2,res4,res6,res8].
11479 When vectorizing outer-loops, we execute the inner-loop sequentially
11480 (each vectorized inner-loop iteration contributes to VF outer-loop
11481 iterations in parallel). We therefore don't allow to change the
11482 order of the computation in the inner-loop during outer-loop
11484 /* TODO: Another case in which order doesn't *really* matter is when we
11485 widen and then contract again, e.g. (short)((int)x * y >> 8).
11486 Normally, pack_trunc performs an even/odd permute, whereas the
11487 repack from an even/odd expansion would be an interleave, which
11488 would be significantly simpler for e.g. AVX2. */
11489 /* In any case, in order to avoid duplicating the code below, recurse
11490 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11491 are properly set up for the caller. If we fail, we'll continue with
11492 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11494 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11495 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11496 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11497 stmt_info
, vectype_out
,
11498 vectype_in
, code1
, code2
,
11499 multi_step_cvt
, interm_types
))
11501 /* Elements in a vector with vect_used_by_reduction property cannot
11502 be reordered if the use chain with this property does not have the
11503 same operation. One such an example is s += a * b, where elements
11504 in a and b cannot be reordered. Here we check if the vector defined
11505 by STMT is only directly used in the reduction statement. */
11506 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11507 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11509 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11512 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11513 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11516 case DOT_PROD_EXPR
:
11517 c1
= DOT_PROD_EXPR
;
11518 c2
= DOT_PROD_EXPR
;
11526 case VEC_WIDEN_MULT_EVEN_EXPR
:
11527 /* Support the recursion induced just above. */
11528 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11529 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11532 case WIDEN_LSHIFT_EXPR
:
11533 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11534 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11538 c1
= VEC_UNPACK_LO_EXPR
;
11539 c2
= VEC_UNPACK_HI_EXPR
;
11543 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11544 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11547 case FIX_TRUNC_EXPR
:
11548 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11549 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11553 gcc_unreachable ();
11556 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11557 std::swap (c1
, c2
);
11559 if (code
== FIX_TRUNC_EXPR
)
11561 /* The signedness is determined from output operand. */
11562 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11563 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11565 else if (CONVERT_EXPR_CODE_P (code
)
11566 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11567 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11568 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11569 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11571 /* If the input and result modes are the same, a different optab
11572 is needed where we pass in the number of units in vectype. */
11573 optab1
= vec_unpacks_sbool_lo_optab
;
11574 optab2
= vec_unpacks_sbool_hi_optab
;
11578 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11579 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11582 if (!optab1
|| !optab2
)
11585 vec_mode
= TYPE_MODE (vectype
);
11586 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11587 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11593 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11594 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11596 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11598 /* For scalar masks we may have different boolean
11599 vector types having the same QImode. Thus we
11600 add additional check for elements number. */
11601 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11602 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11606 /* Check if it's a multi-step conversion that can be done using intermediate
11609 prev_type
= vectype
;
11610 prev_mode
= vec_mode
;
11612 if (!CONVERT_EXPR_CODE_P (code
))
11615 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11616 intermediate steps in promotion sequence. We try
11617 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11619 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11620 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11622 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11623 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11625 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
11628 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11629 TYPE_UNSIGNED (prev_type
));
11631 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11632 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11633 && intermediate_mode
== prev_mode
11634 && SCALAR_INT_MODE_P (prev_mode
))
11636 /* If the input and result modes are the same, a different optab
11637 is needed where we pass in the number of units in vectype. */
11638 optab3
= vec_unpacks_sbool_lo_optab
;
11639 optab4
= vec_unpacks_sbool_hi_optab
;
11643 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11644 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11647 if (!optab3
|| !optab4
11648 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11649 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11650 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11651 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11652 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11653 == CODE_FOR_nothing
)
11654 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11655 == CODE_FOR_nothing
))
11658 interm_types
->quick_push (intermediate_type
);
11659 (*multi_step_cvt
)++;
11661 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11662 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11664 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11666 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11667 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11671 prev_type
= intermediate_type
;
11672 prev_mode
= intermediate_mode
;
11675 interm_types
->release ();
11680 /* Function supportable_narrowing_operation
11682 Check whether an operation represented by the code CODE is a
11683 narrowing operation that is supported by the target platform in
11684 vector form (i.e., when operating on arguments of type VECTYPE_IN
11685 and producing a result of type VECTYPE_OUT).
11687 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11688 and FLOAT. This function checks if these operations are supported by
11689 the target platform directly via vector tree-codes.
11692 - CODE1 is the code of a vector operation to be used when
11693 vectorizing the operation, if available.
11694 - MULTI_STEP_CVT determines the number of required intermediate steps in
11695 case of multi-step conversion (like int->short->char - in that case
11696 MULTI_STEP_CVT will be 1).
11697 - INTERM_TYPES contains the intermediate type required to perform the
11698 narrowing operation (short in the above example). */
11701 supportable_narrowing_operation (enum tree_code code
,
11702 tree vectype_out
, tree vectype_in
,
11703 enum tree_code
*code1
, int *multi_step_cvt
,
11704 vec
<tree
> *interm_types
)
11706 machine_mode vec_mode
;
11707 enum insn_code icode1
;
11708 optab optab1
, interm_optab
;
11709 tree vectype
= vectype_in
;
11710 tree narrow_vectype
= vectype_out
;
11712 tree intermediate_type
, prev_type
;
11713 machine_mode intermediate_mode
, prev_mode
;
11717 *multi_step_cvt
= 0;
11721 c1
= VEC_PACK_TRUNC_EXPR
;
11722 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
11723 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11724 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
11725 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11726 optab1
= vec_pack_sbool_trunc_optab
;
11728 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11731 case FIX_TRUNC_EXPR
:
11732 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
11733 /* The signedness is determined from output operand. */
11734 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11738 c1
= VEC_PACK_FLOAT_EXPR
;
11739 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11743 gcc_unreachable ();
11749 vec_mode
= TYPE_MODE (vectype
);
11750 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
11755 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11757 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11759 /* For scalar masks we may have different boolean
11760 vector types having the same QImode. Thus we
11761 add additional check for elements number. */
11762 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
11763 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11767 if (code
== FLOAT_EXPR
)
11770 /* Check if it's a multi-step conversion that can be done using intermediate
11772 prev_mode
= vec_mode
;
11773 prev_type
= vectype
;
11774 if (code
== FIX_TRUNC_EXPR
)
11775 uns
= TYPE_UNSIGNED (vectype_out
);
11777 uns
= TYPE_UNSIGNED (vectype
);
11779 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11780 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11781 costly than signed. */
11782 if (code
== FIX_TRUNC_EXPR
&& uns
)
11784 enum insn_code icode2
;
11787 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
11789 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11790 if (interm_optab
!= unknown_optab
11791 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
11792 && insn_data
[icode1
].operand
[0].mode
11793 == insn_data
[icode2
].operand
[0].mode
)
11796 optab1
= interm_optab
;
11801 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11802 intermediate steps in promotion sequence. We try
11803 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11804 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11805 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11807 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11808 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11810 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
11813 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
11814 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11815 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11816 && intermediate_mode
== prev_mode
11817 && SCALAR_INT_MODE_P (prev_mode
))
11818 interm_optab
= vec_pack_sbool_trunc_optab
;
11821 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
11824 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
11825 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11826 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
11827 == CODE_FOR_nothing
))
11830 interm_types
->quick_push (intermediate_type
);
11831 (*multi_step_cvt
)++;
11833 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11835 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11837 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
11838 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11842 prev_mode
= intermediate_mode
;
11843 prev_type
= intermediate_type
;
11844 optab1
= interm_optab
;
11847 interm_types
->release ();
11851 /* Generate and return a statement that sets vector mask MASK such that
11852 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11855 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
11857 tree cmp_type
= TREE_TYPE (start_index
);
11858 tree mask_type
= TREE_TYPE (mask
);
11859 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
11860 cmp_type
, mask_type
,
11861 OPTIMIZE_FOR_SPEED
));
11862 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
11863 start_index
, end_index
,
11864 build_zero_cst (mask_type
));
11865 gimple_call_set_lhs (call
, mask
);
11869 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11870 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11873 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
11876 tree tmp
= make_ssa_name (mask_type
);
11877 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
11878 gimple_seq_add_stmt (seq
, call
);
11879 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
11882 /* Try to compute the vector types required to vectorize STMT_INFO,
11883 returning true on success and false if vectorization isn't possible.
11884 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11885 take sure that the number of elements in the vectors is no bigger
11890 - Set *STMT_VECTYPE_OUT to:
11891 - NULL_TREE if the statement doesn't need to be vectorized;
11892 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11894 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11895 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11896 statement does not help to determine the overall number of units. */
11899 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
11900 tree
*stmt_vectype_out
,
11901 tree
*nunits_vectype_out
,
11902 unsigned int group_size
)
11904 gimple
*stmt
= stmt_info
->stmt
;
11906 /* For BB vectorization, we should always have a group size once we've
11907 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11908 are tentative requests during things like early data reference
11909 analysis and pattern recognition. */
11910 if (is_a
<bb_vec_info
> (vinfo
))
11911 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11915 *stmt_vectype_out
= NULL_TREE
;
11916 *nunits_vectype_out
= NULL_TREE
;
11918 if (gimple_get_lhs (stmt
) == NULL_TREE
11919 /* MASK_STORE has no lhs, but is ok. */
11920 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11922 if (is_a
<gcall
*> (stmt
))
11924 /* Ignore calls with no lhs. These must be calls to
11925 #pragma omp simd functions, and what vectorization factor
11926 it really needs can't be determined until
11927 vectorizable_simd_clone_call. */
11928 if (dump_enabled_p ())
11929 dump_printf_loc (MSG_NOTE
, vect_location
,
11930 "defer to SIMD clone analysis.\n");
11931 return opt_result::success ();
11934 return opt_result::failure_at (stmt
,
11935 "not vectorized: irregular stmt.%G", stmt
);
11938 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
11939 return opt_result::failure_at (stmt
,
11940 "not vectorized: vector stmt in loop:%G",
11944 tree scalar_type
= NULL_TREE
;
11945 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
11947 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11948 if (dump_enabled_p ())
11949 dump_printf_loc (MSG_NOTE
, vect_location
,
11950 "precomputed vectype: %T\n", vectype
);
11952 else if (vect_use_mask_type_p (stmt_info
))
11954 unsigned int precision
= stmt_info
->mask_precision
;
11955 scalar_type
= build_nonstandard_integer_type (precision
, 1);
11956 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
11958 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
11959 " data-type %T\n", scalar_type
);
11960 if (dump_enabled_p ())
11961 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
11965 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
11966 scalar_type
= TREE_TYPE (DR_REF (dr
));
11967 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11968 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
11970 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
11972 if (dump_enabled_p ())
11975 dump_printf_loc (MSG_NOTE
, vect_location
,
11976 "get vectype for scalar type (group size %d):"
11977 " %T\n", group_size
, scalar_type
);
11979 dump_printf_loc (MSG_NOTE
, vect_location
,
11980 "get vectype for scalar type: %T\n", scalar_type
);
11982 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11984 return opt_result::failure_at (stmt
,
11986 " unsupported data-type %T\n",
11989 if (dump_enabled_p ())
11990 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
11992 *stmt_vectype_out
= vectype
;
11994 /* Don't try to compute scalar types if the stmt produces a boolean
11995 vector; use the existing vector type instead. */
11996 tree nunits_vectype
= vectype
;
11997 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11999 /* The number of units is set according to the smallest scalar
12000 type (or the largest vector size, but we only support one
12001 vector size per vectorization). */
12002 HOST_WIDE_INT dummy
;
12003 scalar_type
= vect_get_smallest_scalar_type (stmt_info
, &dummy
, &dummy
);
12004 if (scalar_type
!= TREE_TYPE (vectype
))
12006 if (dump_enabled_p ())
12007 dump_printf_loc (MSG_NOTE
, vect_location
,
12008 "get vectype for smallest scalar type: %T\n",
12010 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12012 if (!nunits_vectype
)
12013 return opt_result::failure_at
12014 (stmt
, "not vectorized: unsupported data-type %T\n",
12016 if (dump_enabled_p ())
12017 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12022 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12023 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)));
12025 if (dump_enabled_p ())
12027 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12028 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12029 dump_printf (MSG_NOTE
, "\n");
12032 *nunits_vectype_out
= nunits_vectype
;
12033 return opt_result::success ();
12036 /* Generate and return statement sequence that sets vector length LEN that is:
12038 min_of_start_and_end = min (START_INDEX, END_INDEX);
12039 left_len = END_INDEX - min_of_start_and_end;
12040 rhs = min (left_len, LEN_LIMIT);
12043 Note: the cost of the code generated by this function is modeled
12044 by vect_estimate_min_profitable_iters, so changes here may need
12045 corresponding changes there. */
12048 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12050 gimple_seq stmts
= NULL
;
12051 tree len_type
= TREE_TYPE (len
);
12052 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12054 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12055 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12056 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12057 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12058 gimple_seq_add_stmt (&stmts
, stmt
);