1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 if (is_gimple_debug (gsi_stmt (si
)))
641 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "init: stmt relevant? %G", stmt_info
->stmt
);
646 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
647 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
651 /* 2. Process_worklist */
652 while (worklist
.length () > 0)
657 stmt_vec_info stmt_vinfo
= worklist
.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE
, vect_location
,
660 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
680 case vect_reduction_def
:
681 gcc_assert (relevant
!= vect_unused_in_scope
);
682 if (relevant
!= vect_unused_in_scope
683 && relevant
!= vect_used_in_scope
684 && relevant
!= vect_used_by_reduction
685 && relevant
!= vect_used_only_live
)
686 return opt_result::failure_at
687 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
690 case vect_nested_cycle
:
691 if (relevant
!= vect_unused_in_scope
692 && relevant
!= vect_used_in_outer_by_reduction
693 && relevant
!= vect_used_in_outer
)
694 return opt_result::failure_at
695 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
698 case vect_double_reduction_def
:
699 if (relevant
!= vect_unused_in_scope
700 && relevant
!= vect_used_by_reduction
701 && relevant
!= vect_used_only_live
)
702 return opt_result::failure_at
703 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
710 if (is_pattern_stmt_p (stmt_vinfo
))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
717 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
718 tree op
= gimple_assign_rhs1 (assign
);
721 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
724 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
725 loop_vinfo
, relevant
, &worklist
, false);
728 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
729 loop_vinfo
, relevant
, &worklist
, false);
734 for (; i
< gimple_num_ops (assign
); i
++)
736 op
= gimple_op (assign
, i
);
737 if (TREE_CODE (op
) == SSA_NAME
)
740 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
747 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
749 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
751 tree arg
= gimple_call_arg (call
, i
);
753 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
765 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
773 gather_scatter_info gs_info
;
774 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
777 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
798 vect_model_simple_cost (vec_info
*,
799 stmt_vec_info stmt_info
, int ncopies
,
800 enum vect_def_type
*dt
,
803 stmt_vector_for_cost
*cost_vec
,
804 vect_cost_for_stmt kind
= vector_stmt
)
806 int inside_cost
= 0, prologue_cost
= 0;
808 gcc_assert (cost_vec
!= NULL
);
810 /* ??? Somehow we need to fix this at the callers. */
812 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
818 for (int i
= 0; i
< ndts
; i
++)
819 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
820 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
821 stmt_info
, 0, vect_prologue
);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
825 stmt_info
, 0, vect_body
);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE
, vect_location
,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
843 enum vect_def_type
*dt
,
844 unsigned int ncopies
, int pwr
,
845 stmt_vector_for_cost
*cost_vec
)
848 int inside_cost
= 0, prologue_cost
= 0;
850 for (i
= 0; i
< pwr
+ 1; i
++)
852 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_promote_demote
,
853 stmt_info
, 0, vect_body
);
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i
= 0; i
< 2; i
++)
859 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
860 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
861 stmt_info
, 0, vect_prologue
);
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE
, vect_location
,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
869 /* Returns true if the current function returns DECL. */
872 cfun_returns (tree decl
)
876 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
878 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
881 if (gimple_return_retval (ret
) == decl
)
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
889 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
891 while (gimple_clobber_p (def
));
892 if (is_a
<gassign
*> (def
)
893 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
894 && gimple_assign_rhs1 (def
) == decl
)
900 /* Function vect_model_store_cost
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
906 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
907 vect_memory_access_type memory_access_type
,
908 vec_load_store_type vls_type
, slp_tree slp_node
,
909 stmt_vector_for_cost
*cost_vec
)
911 unsigned int inside_cost
= 0, prologue_cost
= 0;
912 stmt_vec_info first_stmt_info
= stmt_info
;
913 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
915 /* ??? Somehow we need to fix this at the callers. */
917 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
919 if (vls_type
== VLS_STORE_INVARIANT
)
922 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
923 stmt_info
, 0, vect_prologue
);
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node
&& grouped_access_p
)
929 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
941 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
943 /* Uses a high and low interleave or shuffle operations for each
945 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
946 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
947 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
948 stmt_info
, 0, vect_body
);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE
, vect_location
,
952 "vect_model_store_cost: strided group_size = %d .\n",
956 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
957 /* Costs of the stores. */
958 if (memory_access_type
== VMAT_ELEMENTWISE
959 || memory_access_type
== VMAT_GATHER_SCATTER
)
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
963 inside_cost
+= record_stmt_cost (cost_vec
,
964 ncopies
* assumed_nunits
,
965 scalar_store
, stmt_info
, 0, vect_body
);
968 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, &inside_cost
, cost_vec
);
970 if (memory_access_type
== VMAT_ELEMENTWISE
971 || memory_access_type
== VMAT_STRIDED_SLP
)
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
975 inside_cost
+= record_stmt_cost (cost_vec
,
976 ncopies
* assumed_nunits
,
977 vec_to_scalar
, stmt_info
, 0, vect_body
);
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
986 && (TREE_CODE (base
) == RESULT_DECL
987 || (DECL_P (base
) && cfun_returns (base
)))
988 && !aggregate_value_p (base
, cfun
->decl
))
990 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
994 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
1000 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1002 stmt_info
, 0, vect_epilogue
);
1004 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1006 stmt_info
, 0, vect_epilogue
);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE
, vect_location
,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1018 /* Calculate cost of DR's memory access. */
1020 vect_get_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1021 unsigned int *inside_cost
,
1022 stmt_vector_for_cost
*body_cost_vec
)
1024 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1028 switch (alignment_support_scheme
)
1032 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1033 vector_store
, stmt_info
, 0,
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE
, vect_location
,
1038 "vect_model_store_cost: aligned.\n");
1042 case dr_unaligned_supported
:
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1046 unaligned_store
, stmt_info
,
1047 DR_MISALIGNMENT (dr_info
),
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE
, vect_location
,
1051 "vect_model_store_cost: unaligned supported by "
1056 case dr_unaligned_unsupported
:
1058 *inside_cost
= VECT_MAX_COST
;
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1062 "vect_model_store_cost: unsupported access.\n");
1072 /* Function vect_model_load_cost
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1080 vect_model_load_cost (vec_info
*vinfo
,
1081 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1082 vect_memory_access_type memory_access_type
,
1084 stmt_vector_for_cost
*cost_vec
)
1086 unsigned int inside_cost
= 0, prologue_cost
= 0;
1087 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1089 gcc_assert (cost_vec
);
1091 /* ??? Somehow we need to fix this at the callers. */
1093 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1095 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms
, n_loads
;
1102 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1103 vf
, true, &n_perms
, &n_loads
);
1104 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1105 first_stmt_info
, 0, vect_body
);
1107 /* And adjust the number of loads performed. This handles
1108 redundancies as well as loads that are later dead. */
1112 /* Grouped loads read all elements in the group at once,
1113 so we want the DR for the first statement. */
1114 stmt_vec_info first_stmt_info
= stmt_info
;
1115 if (!slp_node
&& grouped_access_p
)
1116 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1118 /* True if we should include any once-per-group costs as well as
1119 the cost of the statement itself. For SLP we only get called
1120 once per group anyhow. */
1121 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1123 /* We assume that the cost of a single load-lanes instruction is
1124 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1125 access is instead being provided by a load-and-permute operation,
1126 include the cost of the permutes. */
1128 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1130 /* Uses an even and odd extract operations or shuffle operations
1131 for each needed permute. */
1132 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1133 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1134 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1135 stmt_info
, 0, vect_body
);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE
, vect_location
,
1139 "vect_model_load_cost: strided group_size = %d .\n",
1143 /* The loads themselves. */
1144 if (memory_access_type
== VMAT_ELEMENTWISE
1145 || memory_access_type
== VMAT_GATHER_SCATTER
)
1147 /* N scalar loads plus gathering them into a vector. */
1148 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1149 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1150 inside_cost
+= record_stmt_cost (cost_vec
,
1151 ncopies
* assumed_nunits
,
1152 scalar_load
, stmt_info
, 0, vect_body
);
1155 vect_get_load_cost (vinfo
, stmt_info
, ncopies
, first_stmt_p
,
1156 &inside_cost
, &prologue_cost
,
1157 cost_vec
, cost_vec
, true);
1158 if (memory_access_type
== VMAT_ELEMENTWISE
1159 || memory_access_type
== VMAT_STRIDED_SLP
)
1160 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1161 stmt_info
, 0, vect_body
);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE
, vect_location
,
1165 "vect_model_load_cost: inside_cost = %d, "
1166 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1170 /* Calculate cost of DR's memory access. */
1172 vect_get_load_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1173 bool add_realign_cost
, unsigned int *inside_cost
,
1174 unsigned int *prologue_cost
,
1175 stmt_vector_for_cost
*prologue_cost_vec
,
1176 stmt_vector_for_cost
*body_cost_vec
,
1177 bool record_prologue_costs
)
1179 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1180 int alignment_support_scheme
1181 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1183 switch (alignment_support_scheme
)
1187 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1188 stmt_info
, 0, vect_body
);
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_NOTE
, vect_location
,
1192 "vect_model_load_cost: aligned.\n");
1196 case dr_unaligned_supported
:
1198 /* Here, we assign an additional cost for the unaligned load. */
1199 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1200 unaligned_load
, stmt_info
,
1201 DR_MISALIGNMENT (dr_info
),
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE
, vect_location
,
1206 "vect_model_load_cost: unaligned supported by "
1211 case dr_explicit_realign
:
1213 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1214 vector_load
, stmt_info
, 0, vect_body
);
1215 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1216 vec_perm
, stmt_info
, 0, vect_body
);
1218 /* FIXME: If the misalignment remains fixed across the iterations of
1219 the containing loop, the following cost should be added to the
1221 if (targetm
.vectorize
.builtin_mask_for_load
)
1222 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1223 stmt_info
, 0, vect_body
);
1225 if (dump_enabled_p ())
1226 dump_printf_loc (MSG_NOTE
, vect_location
,
1227 "vect_model_load_cost: explicit realign\n");
1231 case dr_explicit_realign_optimized
:
1233 if (dump_enabled_p ())
1234 dump_printf_loc (MSG_NOTE
, vect_location
,
1235 "vect_model_load_cost: unaligned software "
1238 /* Unaligned software pipeline has a load of an address, an initial
1239 load, and possibly a mask operation to "prime" the loop. However,
1240 if this is an access in a group of loads, which provide grouped
1241 access, then the above cost should only be considered for one
1242 access in the group. Inside the loop, there is a load op
1243 and a realignment op. */
1245 if (add_realign_cost
&& record_prologue_costs
)
1247 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1248 vector_stmt
, stmt_info
,
1250 if (targetm
.vectorize
.builtin_mask_for_load
)
1251 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1252 vector_stmt
, stmt_info
,
1256 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1257 stmt_info
, 0, vect_body
);
1258 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1259 stmt_info
, 0, vect_body
);
1261 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_NOTE
, vect_location
,
1263 "vect_model_load_cost: explicit realign optimized"
1269 case dr_unaligned_unsupported
:
1271 *inside_cost
= VECT_MAX_COST
;
1273 if (dump_enabled_p ())
1274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1275 "vect_model_load_cost: unsupported access.\n");
1284 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1285 the loop preheader for the vectorized stmt STMT_VINFO. */
1288 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1289 gimple_stmt_iterator
*gsi
)
1292 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1294 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1296 if (dump_enabled_p ())
1297 dump_printf_loc (MSG_NOTE
, vect_location
,
1298 "created new init_stmt: %G", new_stmt
);
1301 /* Function vect_init_vector.
1303 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1304 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1305 vector type a vector with all elements equal to VAL is created first.
1306 Place the initialization at GSI if it is not NULL. Otherwise, place the
1307 initialization at the loop preheader.
1308 Return the DEF of INIT_STMT.
1309 It will be used in the vectorization of STMT_INFO. */
1312 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1313 gimple_stmt_iterator
*gsi
)
1318 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1319 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1321 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1322 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1324 /* Scalar boolean value should be transformed into
1325 all zeros or all ones value before building a vector. */
1326 if (VECTOR_BOOLEAN_TYPE_P (type
))
1328 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1329 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1331 if (CONSTANT_CLASS_P (val
))
1332 val
= integer_zerop (val
) ? false_val
: true_val
;
1335 new_temp
= make_ssa_name (TREE_TYPE (type
));
1336 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1337 val
, true_val
, false_val
);
1338 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1344 gimple_seq stmts
= NULL
;
1345 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1346 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1347 TREE_TYPE (type
), val
);
1349 /* ??? Condition vectorization expects us to do
1350 promotion of invariant/external defs. */
1351 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1352 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1353 !gsi_end_p (gsi2
); )
1355 init_stmt
= gsi_stmt (gsi2
);
1356 gsi_remove (&gsi2
, false);
1357 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1361 val
= build_vector_from_val (type
, val
);
1364 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1365 init_stmt
= gimple_build_assign (new_temp
, val
);
1366 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1371 /* Function vect_get_vec_defs_for_operand.
1373 OP is an operand in STMT_VINFO. This function returns a vector of
1374 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1376 In the case that OP is an SSA_NAME which is defined in the loop, then
1377 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1379 In case OP is an invariant or constant, a new stmt that creates a vector def
1380 needs to be introduced. VECTYPE may be used to specify a required type for
1381 vector invariant. */
1384 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1386 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1389 enum vect_def_type dt
;
1391 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1393 if (dump_enabled_p ())
1394 dump_printf_loc (MSG_NOTE
, vect_location
,
1395 "vect_get_vec_defs_for_operand: %T\n", op
);
1397 stmt_vec_info def_stmt_info
;
1398 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1399 &def_stmt_info
, &def_stmt
);
1400 gcc_assert (is_simple_use
);
1401 if (def_stmt
&& dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1404 vec_oprnds
->create (ncopies
);
1405 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1407 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1411 vector_type
= vectype
;
1412 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1413 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1414 vector_type
= truth_type_for (stmt_vectype
);
1416 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1418 gcc_assert (vector_type
);
1419 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1421 vec_oprnds
->quick_push (vop
);
1425 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1426 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1427 for (unsigned i
= 0; i
< ncopies
; ++i
)
1428 vec_oprnds
->quick_push (gimple_get_lhs
1429 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1434 /* Get vectorized definitions for OP0 and OP1. */
1437 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1439 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1440 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1441 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1442 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1447 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1449 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1451 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1453 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1458 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1459 op0
, vec_oprnds0
, vectype0
);
1461 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1462 op1
, vec_oprnds1
, vectype1
);
1464 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1465 op2
, vec_oprnds2
, vectype2
);
1467 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1468 op3
, vec_oprnds3
, vectype3
);
1473 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1475 tree op0
, vec
<tree
> *vec_oprnds0
,
1476 tree op1
, vec
<tree
> *vec_oprnds1
,
1477 tree op2
, vec
<tree
> *vec_oprnds2
,
1478 tree op3
, vec
<tree
> *vec_oprnds3
)
1480 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1481 op0
, vec_oprnds0
, NULL_TREE
,
1482 op1
, vec_oprnds1
, NULL_TREE
,
1483 op2
, vec_oprnds2
, NULL_TREE
,
1484 op3
, vec_oprnds3
, NULL_TREE
);
1487 /* Helper function called by vect_finish_replace_stmt and
1488 vect_finish_stmt_generation. Set the location of the new
1489 statement and create and return a stmt_vec_info for it. */
1492 vect_finish_stmt_generation_1 (vec_info
*,
1493 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1495 if (dump_enabled_p ())
1496 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1500 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1502 /* While EH edges will generally prevent vectorization, stmt might
1503 e.g. be in a must-not-throw region. Ensure newly created stmts
1504 that could throw are part of the same region. */
1505 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1506 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1507 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1510 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1513 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1514 which sets the same scalar result as STMT_INFO did. Create and return a
1515 stmt_vec_info for VEC_STMT. */
1518 vect_finish_replace_stmt (vec_info
*vinfo
,
1519 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1521 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1522 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1524 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1525 gsi_replace (&gsi
, vec_stmt
, true);
1527 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1530 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1531 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1534 vect_finish_stmt_generation (vec_info
*vinfo
,
1535 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1536 gimple_stmt_iterator
*gsi
)
1538 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1540 if (!gsi_end_p (*gsi
)
1541 && gimple_has_mem_ops (vec_stmt
))
1543 gimple
*at_stmt
= gsi_stmt (*gsi
);
1544 tree vuse
= gimple_vuse (at_stmt
);
1545 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1547 tree vdef
= gimple_vdef (at_stmt
);
1548 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1549 gimple_set_modified (vec_stmt
, true);
1550 /* If we have an SSA vuse and insert a store, update virtual
1551 SSA form to avoid triggering the renamer. Do so only
1552 if we can easily see all uses - which is what almost always
1553 happens with the way vectorized stmts are inserted. */
1554 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1555 && ((is_gimple_assign (vec_stmt
)
1556 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1557 || (is_gimple_call (vec_stmt
)
1558 && !(gimple_call_flags (vec_stmt
)
1559 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1561 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1562 gimple_set_vdef (vec_stmt
, new_vdef
);
1563 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1567 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1568 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1571 /* We want to vectorize a call to combined function CFN with function
1572 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1573 as the types of all inputs. Check whether this is possible using
1574 an internal function, returning its code if so or IFN_LAST if not. */
1577 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1578 tree vectype_out
, tree vectype_in
)
1581 if (internal_fn_p (cfn
))
1582 ifn
= as_internal_fn (cfn
);
1584 ifn
= associated_internal_fn (fndecl
);
1585 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1587 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1588 if (info
.vectorizable
)
1590 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1591 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1592 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1593 OPTIMIZE_FOR_SPEED
))
1601 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1602 gimple_stmt_iterator
*);
1604 /* Check whether a load or store statement in the loop described by
1605 LOOP_VINFO is possible in a loop using partial vectors. This is
1606 testing whether the vectorizer pass has the appropriate support,
1607 as well as whether the target does.
1609 VLS_TYPE says whether the statement is a load or store and VECTYPE
1610 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1611 says how the load or store is going to be implemented and GROUP_SIZE
1612 is the number of load or store statements in the containing group.
1613 If the access is a gather load or scatter store, GS_INFO describes
1614 its arguments. If the load or store is conditional, SCALAR_MASK is the
1615 condition under which it occurs.
1617 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1618 vectors is not supported, otherwise record the required rgroup control
1622 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1623 vec_load_store_type vls_type
,
1625 vect_memory_access_type
1627 gather_scatter_info
*gs_info
,
1630 /* Invariant loads need no special support. */
1631 if (memory_access_type
== VMAT_INVARIANT
)
1634 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1635 machine_mode vecmode
= TYPE_MODE (vectype
);
1636 bool is_load
= (vls_type
== VLS_LOAD
);
1637 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1640 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1641 : !vect_store_lanes_supported (vectype
, group_size
, true))
1643 if (dump_enabled_p ())
1644 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1645 "can't operate on partial vectors because"
1646 " the target doesn't have an appropriate"
1647 " load/store-lanes instruction.\n");
1648 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1651 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1652 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1656 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1658 internal_fn ifn
= (is_load
1659 ? IFN_MASK_GATHER_LOAD
1660 : IFN_MASK_SCATTER_STORE
);
1661 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1662 gs_info
->memory_type
,
1663 gs_info
->offset_vectype
,
1666 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1668 "can't operate on partial vectors because"
1669 " the target doesn't have an appropriate"
1670 " gather load or scatter store instruction.\n");
1671 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1674 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1675 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1679 if (memory_access_type
!= VMAT_CONTIGUOUS
1680 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1682 /* Element X of the data must come from iteration i * VF + X of the
1683 scalar loop. We need more work to support other mappings. */
1684 if (dump_enabled_p ())
1685 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1686 "can't operate on partial vectors because an"
1687 " access isn't contiguous.\n");
1688 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1692 if (!VECTOR_MODE_P (vecmode
))
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1696 "can't operate on partial vectors when emulating"
1697 " vector operations.\n");
1698 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1702 /* We might load more scalars than we need for permuting SLP loads.
1703 We checked in get_group_load_store_type that the extra elements
1704 don't leak into a new vector. */
1705 auto get_valid_nvectors
= [] (poly_uint64 size
, poly_uint64 nunits
)
1707 unsigned int nvectors
;
1708 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1713 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1714 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1715 machine_mode mask_mode
;
1716 bool using_partial_vectors_p
= false;
1717 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1718 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1720 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1721 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1722 using_partial_vectors_p
= true;
1726 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1728 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1729 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1730 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1731 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1732 using_partial_vectors_p
= true;
1735 if (!using_partial_vectors_p
)
1737 if (dump_enabled_p ())
1738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1739 "can't operate on partial vectors because the"
1740 " target doesn't have the appropriate partial"
1741 " vectorization load or store.\n");
1742 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1746 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1747 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1748 that needs to be applied to all loads and stores in a vectorized loop.
1749 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1751 MASK_TYPE is the type of both masks. If new statements are needed,
1752 insert them before GSI. */
1755 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1756 gimple_stmt_iterator
*gsi
)
1758 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1762 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1763 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1764 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1765 vec_mask
, loop_mask
);
1766 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1770 /* Determine whether we can use a gather load or scatter store to vectorize
1771 strided load or store STMT_INFO by truncating the current offset to a
1772 smaller width. We need to be able to construct an offset vector:
1774 { 0, X, X*2, X*3, ... }
1776 without loss of precision, where X is STMT_INFO's DR_STEP.
1778 Return true if this is possible, describing the gather load or scatter
1779 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1782 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1783 loop_vec_info loop_vinfo
, bool masked_p
,
1784 gather_scatter_info
*gs_info
)
1786 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1787 data_reference
*dr
= dr_info
->dr
;
1788 tree step
= DR_STEP (dr
);
1789 if (TREE_CODE (step
) != INTEGER_CST
)
1791 /* ??? Perhaps we could use range information here? */
1792 if (dump_enabled_p ())
1793 dump_printf_loc (MSG_NOTE
, vect_location
,
1794 "cannot truncate variable step.\n");
1798 /* Get the number of bits in an element. */
1799 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1800 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1801 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1803 /* Set COUNT to the upper limit on the number of elements - 1.
1804 Start with the maximum vectorization factor. */
1805 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1807 /* Try lowering COUNT to the number of scalar latch iterations. */
1808 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1809 widest_int max_iters
;
1810 if (max_loop_iterations (loop
, &max_iters
)
1811 && max_iters
< count
)
1812 count
= max_iters
.to_shwi ();
1814 /* Try scales of 1 and the element size. */
1815 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1816 wi::overflow_type overflow
= wi::OVF_NONE
;
1817 for (int i
= 0; i
< 2; ++i
)
1819 int scale
= scales
[i
];
1821 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1824 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1825 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1828 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1829 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1831 /* Find the narrowest viable offset type. */
1832 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1833 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1836 /* See whether the target supports the operation with an offset
1837 no narrower than OFFSET_TYPE. */
1838 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1839 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1840 vectype
, memory_type
, offset_type
, scale
,
1841 &gs_info
->ifn
, &gs_info
->offset_vectype
))
1844 gs_info
->decl
= NULL_TREE
;
1845 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1846 but we don't need to store that here. */
1847 gs_info
->base
= NULL_TREE
;
1848 gs_info
->element_type
= TREE_TYPE (vectype
);
1849 gs_info
->offset
= fold_convert (offset_type
, step
);
1850 gs_info
->offset_dt
= vect_constant_def
;
1851 gs_info
->scale
= scale
;
1852 gs_info
->memory_type
= memory_type
;
1856 if (overflow
&& dump_enabled_p ())
1857 dump_printf_loc (MSG_NOTE
, vect_location
,
1858 "truncating gather/scatter offset to %d bits"
1859 " might change its value.\n", element_bits
);
1864 /* Return true if we can use gather/scatter internal functions to
1865 vectorize STMT_INFO, which is a grouped or strided load or store.
1866 MASKED_P is true if load or store is conditional. When returning
1867 true, fill in GS_INFO with the information required to perform the
1871 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1872 loop_vec_info loop_vinfo
, bool masked_p
,
1873 gather_scatter_info
*gs_info
)
1875 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1877 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1880 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1881 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1883 gcc_assert (TYPE_PRECISION (new_offset_type
)
1884 >= TYPE_PRECISION (old_offset_type
));
1885 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE
, vect_location
,
1889 "using gather/scatter for strided/grouped access,"
1890 " scale = %d\n", gs_info
->scale
);
1895 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1896 elements with a known constant step. Return -1 if that step
1897 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1900 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1902 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1903 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1907 /* If the target supports a permute mask that reverses the elements in
1908 a vector of type VECTYPE, return that mask, otherwise return null. */
1911 perm_mask_for_reverse (tree vectype
)
1913 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1915 /* The encoding has a single stepped pattern. */
1916 vec_perm_builder
sel (nunits
, 1, 3);
1917 for (int i
= 0; i
< 3; ++i
)
1918 sel
.quick_push (nunits
- 1 - i
);
1920 vec_perm_indices
indices (sel
, 1, nunits
);
1921 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1923 return vect_gen_perm_mask_checked (vectype
, indices
);
1926 /* A subroutine of get_load_store_type, with a subset of the same
1927 arguments. Handle the case where STMT_INFO is a load or store that
1928 accesses consecutive elements with a negative step. */
1930 static vect_memory_access_type
1931 get_negative_load_store_type (vec_info
*vinfo
,
1932 stmt_vec_info stmt_info
, tree vectype
,
1933 vec_load_store_type vls_type
,
1934 unsigned int ncopies
)
1936 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1937 dr_alignment_support alignment_support_scheme
;
1941 if (dump_enabled_p ())
1942 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1943 "multiple types with negative step.\n");
1944 return VMAT_ELEMENTWISE
;
1947 alignment_support_scheme
= vect_supportable_dr_alignment (vinfo
,
1949 if (alignment_support_scheme
!= dr_aligned
1950 && alignment_support_scheme
!= dr_unaligned_supported
)
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1954 "negative step but alignment required.\n");
1955 return VMAT_ELEMENTWISE
;
1958 if (vls_type
== VLS_STORE_INVARIANT
)
1960 if (dump_enabled_p ())
1961 dump_printf_loc (MSG_NOTE
, vect_location
,
1962 "negative step with invariant source;"
1963 " no permute needed.\n");
1964 return VMAT_CONTIGUOUS_DOWN
;
1967 if (!perm_mask_for_reverse (vectype
))
1969 if (dump_enabled_p ())
1970 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1971 "negative step and reversing not supported.\n");
1972 return VMAT_ELEMENTWISE
;
1975 return VMAT_CONTIGUOUS_REVERSE
;
1978 /* STMT_INFO is either a masked or unconditional store. Return the value
1982 vect_get_store_rhs (stmt_vec_info stmt_info
)
1984 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1986 gcc_assert (gimple_assign_single_p (assign
));
1987 return gimple_assign_rhs1 (assign
);
1989 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1991 internal_fn ifn
= gimple_call_internal_fn (call
);
1992 int index
= internal_fn_stored_value_index (ifn
);
1993 gcc_assert (index
>= 0);
1994 return gimple_call_arg (call
, index
);
1999 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2001 This function returns a vector type which can be composed with NETLS pieces,
2002 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2003 same vector size as the return vector. It checks target whether supports
2004 pieces-size vector mode for construction firstly, if target fails to, check
2005 pieces-size scalar mode for construction further. It returns NULL_TREE if
2006 fails to find the available composition.
2008 For example, for (vtype=V16QI, nelts=4), we can probably get:
2009 - V16QI with PTYPE V4QI.
2010 - V4SI with PTYPE SI.
2014 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2016 gcc_assert (VECTOR_TYPE_P (vtype
));
2017 gcc_assert (known_gt (nelts
, 0U));
2019 machine_mode vmode
= TYPE_MODE (vtype
);
2020 if (!VECTOR_MODE_P (vmode
))
2023 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2024 unsigned int pbsize
;
2025 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2027 /* First check if vec_init optab supports construction from
2028 vector pieces directly. */
2029 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2030 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2032 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2033 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2034 != CODE_FOR_nothing
))
2036 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2040 /* Otherwise check if exists an integer type of the same piece size and
2041 if vec_init optab supports construction from it directly. */
2042 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2043 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2044 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2045 != CODE_FOR_nothing
))
2047 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2048 return build_vector_type (*ptype
, nelts
);
2055 /* A subroutine of get_load_store_type, with a subset of the same
2056 arguments. Handle the case where STMT_INFO is part of a grouped load
2059 For stores, the statements in the group are all consecutive
2060 and there is no gap at the end. For loads, the statements in the
2061 group might not be consecutive; there can be gaps between statements
2062 as well as at the end. */
2065 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2066 tree vectype
, slp_tree slp_node
,
2067 bool masked_p
, vec_load_store_type vls_type
,
2068 vect_memory_access_type
*memory_access_type
,
2069 dr_alignment_support
*alignment_support_scheme
,
2070 gather_scatter_info
*gs_info
)
2072 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2073 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2074 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2075 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2076 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2077 bool single_element_p
= (stmt_info
== first_stmt_info
2078 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2079 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2080 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2082 /* True if the vectorized statements would access beyond the last
2083 statement in the group. */
2084 bool overrun_p
= false;
2086 /* True if we can cope with such overrun by peeling for gaps, so that
2087 there is at least one final scalar iteration after the vector loop. */
2088 bool can_overrun_p
= (!masked_p
2089 && vls_type
== VLS_LOAD
2093 /* There can only be a gap at the end of the group if the stride is
2094 known at compile time. */
2095 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2097 /* Stores can't yet have gaps. */
2098 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2102 /* For SLP vectorization we directly vectorize a subchain
2103 without permutation. */
2104 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2106 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2107 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2109 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2110 separated by the stride, until we have a complete vector.
2111 Fall back to scalar accesses if that isn't possible. */
2112 if (multiple_p (nunits
, group_size
))
2113 *memory_access_type
= VMAT_STRIDED_SLP
;
2115 *memory_access_type
= VMAT_ELEMENTWISE
;
2119 overrun_p
= loop_vinfo
&& gap
!= 0;
2120 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2122 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2123 "Grouped store with gaps requires"
2124 " non-consecutive accesses\n");
2127 /* An overrun is fine if the trailing elements are smaller
2128 than the alignment boundary B. Every vector access will
2129 be a multiple of B and so we are guaranteed to access a
2130 non-gap element in the same B-sized block. */
2132 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2133 / vect_get_scalar_dr_size (first_dr_info
)))
2136 /* If the gap splits the vector in half and the target
2137 can do half-vector operations avoid the epilogue peeling
2138 by simply loading half of the vector only. Usually
2139 the construction with an upper zero half will be elided. */
2140 dr_alignment_support alignment_support_scheme
;
2144 && (((alignment_support_scheme
2145 = vect_supportable_dr_alignment (vinfo
,
2146 first_dr_info
, false)))
2148 || alignment_support_scheme
== dr_unaligned_supported
)
2149 && known_eq (nunits
, (group_size
- gap
) * 2)
2150 && known_eq (nunits
, group_size
)
2151 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2155 if (overrun_p
&& !can_overrun_p
)
2157 if (dump_enabled_p ())
2158 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2159 "Peeling for outer loop is not supported\n");
2162 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2165 if (single_element_p
)
2166 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2167 only correct for single element "interleaving" SLP. */
2168 *memory_access_type
= get_negative_load_store_type
2169 (vinfo
, stmt_info
, vectype
, vls_type
, 1);
2172 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2173 separated by the stride, until we have a complete vector.
2174 Fall back to scalar accesses if that isn't possible. */
2175 if (multiple_p (nunits
, group_size
))
2176 *memory_access_type
= VMAT_STRIDED_SLP
;
2178 *memory_access_type
= VMAT_ELEMENTWISE
;
2183 gcc_assert (!loop_vinfo
|| cmp
> 0);
2184 *memory_access_type
= VMAT_CONTIGUOUS
;
2190 /* We can always handle this case using elementwise accesses,
2191 but see if something more efficient is available. */
2192 *memory_access_type
= VMAT_ELEMENTWISE
;
2194 /* If there is a gap at the end of the group then these optimizations
2195 would access excess elements in the last iteration. */
2196 bool would_overrun_p
= (gap
!= 0);
2197 /* An overrun is fine if the trailing elements are smaller than the
2198 alignment boundary B. Every vector access will be a multiple of B
2199 and so we are guaranteed to access a non-gap element in the
2200 same B-sized block. */
2203 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2204 / vect_get_scalar_dr_size (first_dr_info
)))
2205 would_overrun_p
= false;
2207 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2208 && (can_overrun_p
|| !would_overrun_p
)
2209 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2211 /* First cope with the degenerate case of a single-element
2213 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2216 /* Otherwise try using LOAD/STORE_LANES. */
2217 else if (vls_type
== VLS_LOAD
2218 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2219 : vect_store_lanes_supported (vectype
, group_size
,
2222 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2223 overrun_p
= would_overrun_p
;
2226 /* If that fails, try using permuting loads. */
2227 else if (vls_type
== VLS_LOAD
2228 ? vect_grouped_load_supported (vectype
, single_element_p
,
2230 : vect_grouped_store_supported (vectype
, group_size
))
2232 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2233 overrun_p
= would_overrun_p
;
2237 /* As a last resort, trying using a gather load or scatter store.
2239 ??? Although the code can handle all group sizes correctly,
2240 it probably isn't a win to use separate strided accesses based
2241 on nearby locations. Or, even if it's a win over scalar code,
2242 it might not be a win over vectorizing at a lower VF, if that
2243 allows us to use contiguous accesses. */
2244 if (*memory_access_type
== VMAT_ELEMENTWISE
2247 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2249 *memory_access_type
= VMAT_GATHER_SCATTER
;
2252 if (*memory_access_type
== VMAT_GATHER_SCATTER
2253 || *memory_access_type
== VMAT_ELEMENTWISE
)
2254 *alignment_support_scheme
= dr_unaligned_supported
;
2256 *alignment_support_scheme
2257 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
2259 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2261 /* STMT is the leader of the group. Check the operands of all the
2262 stmts of the group. */
2263 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2264 while (next_stmt_info
)
2266 tree op
= vect_get_store_rhs (next_stmt_info
);
2267 enum vect_def_type dt
;
2268 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2272 "use not simple.\n");
2275 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2281 gcc_assert (can_overrun_p
);
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2284 "Data access with gaps requires scalar "
2286 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2292 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2293 if there is a memory access type that the vectorized form can use,
2294 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2295 or scatters, fill in GS_INFO accordingly. In addition
2296 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2297 the target does not support the alignment scheme.
2299 SLP says whether we're performing SLP rather than loop vectorization.
2300 MASKED_P is true if the statement is conditional on a vectorized mask.
2301 VECTYPE is the vector type that the vectorized statements will use.
2302 NCOPIES is the number of vector statements that will be needed. */
2305 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2306 tree vectype
, slp_tree slp_node
,
2307 bool masked_p
, vec_load_store_type vls_type
,
2308 unsigned int ncopies
,
2309 vect_memory_access_type
*memory_access_type
,
2310 dr_alignment_support
*alignment_support_scheme
,
2311 gather_scatter_info
*gs_info
)
2313 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2314 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2315 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2317 *memory_access_type
= VMAT_GATHER_SCATTER
;
2318 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2320 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2321 &gs_info
->offset_dt
,
2322 &gs_info
->offset_vectype
))
2324 if (dump_enabled_p ())
2325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2326 "%s index use not simple.\n",
2327 vls_type
== VLS_LOAD
? "gather" : "scatter");
2330 /* Gather-scatter accesses perform only component accesses, alignment
2331 is irrelevant for them. */
2332 *alignment_support_scheme
= dr_unaligned_supported
;
2334 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2336 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2338 vls_type
, memory_access_type
,
2339 alignment_support_scheme
, gs_info
))
2342 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2344 gcc_assert (!slp_node
);
2346 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2348 *memory_access_type
= VMAT_GATHER_SCATTER
;
2350 *memory_access_type
= VMAT_ELEMENTWISE
;
2351 /* Alignment is irrelevant here. */
2352 *alignment_support_scheme
= dr_unaligned_supported
;
2356 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2358 *memory_access_type
= get_negative_load_store_type
2359 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
);
2362 gcc_assert (vls_type
== VLS_LOAD
);
2363 *memory_access_type
= VMAT_INVARIANT
;
2366 *memory_access_type
= VMAT_CONTIGUOUS
;
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo
,
2369 STMT_VINFO_DR_INFO (stmt_info
), false);
2372 if ((*memory_access_type
== VMAT_ELEMENTWISE
2373 || *memory_access_type
== VMAT_STRIDED_SLP
)
2374 && !nunits
.is_constant ())
2376 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2378 "Not using elementwise accesses due to variable "
2379 "vectorization factor.\n");
2383 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2387 "unsupported unaligned access\n");
2391 /* FIXME: At the moment the cost model seems to underestimate the
2392 cost of using elementwise accesses. This check preserves the
2393 traditional behavior until that can be fixed. */
2394 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2395 if (!first_stmt_info
)
2396 first_stmt_info
= stmt_info
;
2397 if (*memory_access_type
== VMAT_ELEMENTWISE
2398 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2399 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2400 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2401 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2403 if (dump_enabled_p ())
2404 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2405 "not falling back to elementwise accesses\n");
2411 /* Return true if boolean argument MASK is suitable for vectorizing
2412 conditional operation STMT_INFO. When returning true, store the type
2413 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2414 in *MASK_VECTYPE_OUT. */
2417 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree mask
,
2418 vect_def_type
*mask_dt_out
,
2419 tree
*mask_vectype_out
)
2421 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2423 if (dump_enabled_p ())
2424 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2425 "mask argument is not a boolean.\n");
2429 if (TREE_CODE (mask
) != SSA_NAME
)
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2433 "mask argument is not an SSA name.\n");
2437 enum vect_def_type mask_dt
;
2439 if (!vect_is_simple_use (mask
, vinfo
, &mask_dt
, &mask_vectype
))
2441 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2443 "mask use not simple.\n");
2447 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2449 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2451 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2453 if (dump_enabled_p ())
2454 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2455 "could not find an appropriate vector mask type.\n");
2459 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2460 TYPE_VECTOR_SUBPARTS (vectype
)))
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2464 "vector mask type %T"
2465 " does not match vector data type %T.\n",
2466 mask_vectype
, vectype
);
2471 *mask_dt_out
= mask_dt
;
2472 *mask_vectype_out
= mask_vectype
;
2476 /* Return true if stored value RHS is suitable for vectorizing store
2477 statement STMT_INFO. When returning true, store the type of the
2478 definition in *RHS_DT_OUT, the type of the vectorized store value in
2479 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2482 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2483 slp_tree slp_node
, tree rhs
,
2484 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2485 vec_load_store_type
*vls_type_out
)
2487 /* In the case this is a store from a constant make sure
2488 native_encode_expr can handle it. */
2489 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2493 "cannot encode constant as a byte sequence.\n");
2497 enum vect_def_type rhs_dt
;
2500 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
2501 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2505 "use not simple.\n");
2509 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2510 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2514 "incompatible vector types.\n");
2518 *rhs_dt_out
= rhs_dt
;
2519 *rhs_vectype_out
= rhs_vectype
;
2520 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2521 *vls_type_out
= VLS_STORE_INVARIANT
;
2523 *vls_type_out
= VLS_STORE
;
2527 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2528 Note that we support masks with floating-point type, in which case the
2529 floats are interpreted as a bitmask. */
2532 vect_build_all_ones_mask (vec_info
*vinfo
,
2533 stmt_vec_info stmt_info
, tree masktype
)
2535 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2536 return build_int_cst (masktype
, -1);
2537 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2539 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2540 mask
= build_vector_from_val (masktype
, mask
);
2541 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2543 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2547 for (int j
= 0; j
< 6; ++j
)
2549 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2550 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2551 mask
= build_vector_from_val (masktype
, mask
);
2552 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2557 /* Build an all-zero merge value of type VECTYPE while vectorizing
2558 STMT_INFO as a gather load. */
2561 vect_build_zero_merge_argument (vec_info
*vinfo
,
2562 stmt_vec_info stmt_info
, tree vectype
)
2565 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2566 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2567 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2571 for (int j
= 0; j
< 6; ++j
)
2573 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2574 merge
= build_real (TREE_TYPE (vectype
), r
);
2578 merge
= build_vector_from_val (vectype
, merge
);
2579 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2582 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2583 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2584 the gather load operation. If the load is conditional, MASK is the
2585 unvectorized condition and MASK_DT is its definition type, otherwise
2589 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2590 gimple_stmt_iterator
*gsi
,
2592 gather_scatter_info
*gs_info
,
2595 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2596 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2597 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2598 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2599 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2600 edge pe
= loop_preheader_edge (loop
);
2601 enum { NARROW
, NONE
, WIDEN
} modifier
;
2602 poly_uint64 gather_off_nunits
2603 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2605 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2606 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2607 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2608 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2609 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2610 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2611 tree scaletype
= TREE_VALUE (arglist
);
2612 tree real_masktype
= masktype
;
2613 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2615 || TREE_CODE (masktype
) == INTEGER_TYPE
2616 || types_compatible_p (srctype
, masktype
)));
2617 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2618 masktype
= truth_type_for (srctype
);
2620 tree mask_halftype
= masktype
;
2621 tree perm_mask
= NULL_TREE
;
2622 tree mask_perm_mask
= NULL_TREE
;
2623 if (known_eq (nunits
, gather_off_nunits
))
2625 else if (known_eq (nunits
* 2, gather_off_nunits
))
2629 /* Currently widening gathers and scatters are only supported for
2630 fixed-length vectors. */
2631 int count
= gather_off_nunits
.to_constant ();
2632 vec_perm_builder
sel (count
, count
, 1);
2633 for (int i
= 0; i
< count
; ++i
)
2634 sel
.quick_push (i
| (count
/ 2));
2636 vec_perm_indices
indices (sel
, 1, count
);
2637 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2640 else if (known_eq (nunits
, gather_off_nunits
* 2))
2644 /* Currently narrowing gathers and scatters are only supported for
2645 fixed-length vectors. */
2646 int count
= nunits
.to_constant ();
2647 vec_perm_builder
sel (count
, count
, 1);
2648 sel
.quick_grow (count
);
2649 for (int i
= 0; i
< count
; ++i
)
2650 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2651 vec_perm_indices
indices (sel
, 2, count
);
2652 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2656 if (mask
&& masktype
== real_masktype
)
2658 for (int i
= 0; i
< count
; ++i
)
2659 sel
[i
] = i
| (count
/ 2);
2660 indices
.new_vector (sel
, 2, count
);
2661 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2664 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2669 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2670 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2672 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2673 if (!is_gimple_min_invariant (ptr
))
2676 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2677 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2678 gcc_assert (!new_bb
);
2681 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2683 tree vec_oprnd0
= NULL_TREE
;
2684 tree vec_mask
= NULL_TREE
;
2685 tree src_op
= NULL_TREE
;
2686 tree mask_op
= NULL_TREE
;
2687 tree prev_res
= NULL_TREE
;
2691 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2692 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2695 auto_vec
<tree
> vec_oprnds0
;
2696 auto_vec
<tree
> vec_masks
;
2697 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2698 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2699 gs_info
->offset
, &vec_oprnds0
);
2701 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2702 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2704 for (int j
= 0; j
< ncopies
; ++j
)
2707 if (modifier
== WIDEN
&& (j
& 1))
2708 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2709 perm_mask
, stmt_info
, gsi
);
2711 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2713 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2715 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2716 TYPE_VECTOR_SUBPARTS (idxtype
)));
2717 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2718 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2719 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2720 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2726 if (mask_perm_mask
&& (j
& 1))
2727 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2728 mask_perm_mask
, stmt_info
, gsi
);
2731 if (modifier
== NARROW
)
2734 vec_mask
= vec_masks
[j
/ 2];
2737 vec_mask
= vec_masks
[j
];
2740 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2742 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2743 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2744 gcc_assert (known_eq (sub1
, sub2
));
2745 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2746 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2748 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2749 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2753 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2755 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2757 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2758 : VEC_UNPACK_LO_EXPR
,
2760 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2766 tree mask_arg
= mask_op
;
2767 if (masktype
!= real_masktype
)
2769 tree utype
, optype
= TREE_TYPE (mask_op
);
2770 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2771 utype
= real_masktype
;
2773 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2774 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2775 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2777 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2778 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2780 if (!useless_type_conversion_p (real_masktype
, utype
))
2782 gcc_assert (TYPE_PRECISION (utype
)
2783 <= TYPE_PRECISION (real_masktype
));
2784 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2785 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2786 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2789 src_op
= build_zero_cst (srctype
);
2791 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2794 if (!useless_type_conversion_p (vectype
, rettype
))
2796 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2797 TYPE_VECTOR_SUBPARTS (rettype
)));
2798 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2799 gimple_call_set_lhs (new_stmt
, op
);
2800 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2801 var
= make_ssa_name (vec_dest
);
2802 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2803 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2804 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2808 var
= make_ssa_name (vec_dest
, new_stmt
);
2809 gimple_call_set_lhs (new_stmt
, var
);
2810 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2813 if (modifier
== NARROW
)
2820 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2822 new_stmt
= SSA_NAME_DEF_STMT (var
);
2825 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2827 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2830 /* Prepare the base and offset in GS_INFO for vectorization.
2831 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2832 to the vectorized offset argument for the first copy of STMT_INFO.
2833 STMT_INFO is the statement described by GS_INFO and LOOP is the
2837 vect_get_gather_scatter_ops (vec_info
*vinfo
,
2838 class loop
*loop
, stmt_vec_info stmt_info
,
2839 gather_scatter_info
*gs_info
,
2840 tree
*dataref_ptr
, vec
<tree
> *vec_offset
,
2843 gimple_seq stmts
= NULL
;
2844 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2848 edge pe
= loop_preheader_edge (loop
);
2849 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2850 gcc_assert (!new_bb
);
2852 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, gs_info
->offset
,
2853 vec_offset
, gs_info
->offset_vectype
);
2856 /* Prepare to implement a grouped or strided load or store using
2857 the gather load or scatter store operation described by GS_INFO.
2858 STMT_INFO is the load or store statement.
2860 Set *DATAREF_BUMP to the amount that should be added to the base
2861 address after each copy of the vectorized statement. Set *VEC_OFFSET
2862 to an invariant offset vector in which element I has the value
2863 I * DR_STEP / SCALE. */
2866 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2867 loop_vec_info loop_vinfo
,
2868 gather_scatter_info
*gs_info
,
2869 tree
*dataref_bump
, tree
*vec_offset
)
2871 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2872 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2874 tree bump
= size_binop (MULT_EXPR
,
2875 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2876 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2877 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
2879 /* The offset given in GS_INFO can have pointer type, so use the element
2880 type of the vector instead. */
2881 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2883 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2884 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2885 ssize_int (gs_info
->scale
));
2886 step
= fold_convert (offset_type
, step
);
2888 /* Create {0, X, X*2, X*3, ...}. */
2889 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2890 build_zero_cst (offset_type
), step
);
2891 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
2894 /* Return the amount that should be added to a vector pointer to move
2895 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2896 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2900 vect_get_data_ptr_increment (vec_info
*vinfo
,
2901 dr_vec_info
*dr_info
, tree aggr_type
,
2902 vect_memory_access_type memory_access_type
)
2904 if (memory_access_type
== VMAT_INVARIANT
)
2905 return size_zero_node
;
2907 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2908 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
2909 if (tree_int_cst_sgn (step
) == -1)
2910 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2914 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2917 vectorizable_bswap (vec_info
*vinfo
,
2918 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
2919 gimple
**vec_stmt
, slp_tree slp_node
,
2921 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
2924 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
2925 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2928 op
= gimple_call_arg (stmt
, 0);
2929 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2930 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2932 /* Multiple types in SLP are handled by creating the appropriate number of
2933 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2938 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2940 gcc_assert (ncopies
>= 1);
2942 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2946 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2947 unsigned word_bytes
;
2948 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
2951 /* The encoding uses one stepped pattern for each byte in the word. */
2952 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2953 for (unsigned i
= 0; i
< 3; ++i
)
2954 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2955 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2957 vec_perm_indices
indices (elts
, 1, num_bytes
);
2958 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2964 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
2966 if (dump_enabled_p ())
2967 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2968 "incompatible vector types for invariants\n");
2972 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2973 DUMP_VECT_SCOPE ("vectorizable_bswap");
2976 record_stmt_cost (cost_vec
,
2977 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2978 record_stmt_cost (cost_vec
,
2979 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2984 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
2987 vec
<tree
> vec_oprnds
= vNULL
;
2988 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
2990 /* Arguments are ready. create the new vector stmt. */
2993 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2996 tree tem
= make_ssa_name (char_vectype
);
2997 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2998 char_vectype
, vop
));
2999 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3000 tree tem2
= make_ssa_name (char_vectype
);
3001 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3002 tem
, tem
, bswap_vconst
);
3003 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3004 tem
= make_ssa_name (vectype
);
3005 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3007 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3009 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3011 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3015 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3017 vec_oprnds
.release ();
3021 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3022 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3023 in a single step. On success, store the binary pack code in
3027 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3028 tree_code
*convert_code
)
3030 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3031 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3035 int multi_step_cvt
= 0;
3036 auto_vec
<tree
, 8> interm_types
;
3037 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3038 &code
, &multi_step_cvt
, &interm_types
)
3042 *convert_code
= code
;
3046 /* Function vectorizable_call.
3048 Check if STMT_INFO performs a function call that can be vectorized.
3049 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3050 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3051 Return true if STMT_INFO is vectorizable in this way. */
3054 vectorizable_call (vec_info
*vinfo
,
3055 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3056 gimple
**vec_stmt
, slp_tree slp_node
,
3057 stmt_vector_for_cost
*cost_vec
)
3063 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3064 tree vectype_out
, vectype_in
;
3065 poly_uint64 nunits_in
;
3066 poly_uint64 nunits_out
;
3067 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3068 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3069 tree fndecl
, new_temp
, rhs_type
;
3070 enum vect_def_type dt
[4]
3071 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3072 vect_unknown_def_type
};
3073 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3074 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3075 int ndts
= ARRAY_SIZE (dt
);
3077 auto_vec
<tree
, 8> vargs
;
3078 auto_vec
<tree
, 8> orig_vargs
;
3079 enum { NARROW
, NONE
, WIDEN
} modifier
;
3083 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3086 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3090 /* Is STMT_INFO a vectorizable call? */
3091 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3095 if (gimple_call_internal_p (stmt
)
3096 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3097 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3098 /* Handled by vectorizable_load and vectorizable_store. */
3101 if (gimple_call_lhs (stmt
) == NULL_TREE
3102 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3105 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3107 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3109 /* Process function arguments. */
3110 rhs_type
= NULL_TREE
;
3111 vectype_in
= NULL_TREE
;
3112 nargs
= gimple_call_num_args (stmt
);
3114 /* Bail out if the function has more than four arguments, we do not have
3115 interesting builtin functions to vectorize with more than two arguments
3116 except for fma. No arguments is also not good. */
3117 if (nargs
== 0 || nargs
> 4)
3120 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3121 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3122 if (cfn
== CFN_GOMP_SIMD_LANE
)
3125 rhs_type
= unsigned_type_node
;
3129 if (internal_fn_p (cfn
))
3130 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3132 for (i
= 0; i
< nargs
; i
++)
3134 if ((int) i
== mask_opno
)
3136 op
= gimple_call_arg (stmt
, i
);
3137 if (!vect_check_scalar_mask (vinfo
,
3138 stmt_info
, op
, &dt
[i
], &vectypes
[i
]))
3143 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3144 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3146 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3148 "use not simple.\n");
3152 /* We can only handle calls with arguments of the same type. */
3154 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3156 if (dump_enabled_p ())
3157 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3158 "argument types differ.\n");
3162 rhs_type
= TREE_TYPE (op
);
3165 vectype_in
= vectypes
[i
];
3166 else if (vectypes
[i
]
3167 && !types_compatible_p (vectypes
[i
], vectype_in
))
3169 if (dump_enabled_p ())
3170 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3171 "argument vector types differ.\n");
3175 /* If all arguments are external or constant defs, infer the vector type
3176 from the scalar type. */
3178 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3180 gcc_assert (vectype_in
);
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3185 "no vectype for scalar type %T\n", rhs_type
);
3189 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3190 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3191 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3192 by a pack of the two vectors into an SI vector. We would need
3193 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3194 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3196 if (dump_enabled_p ())
3197 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3198 "mismatched vector sizes %T and %T\n",
3199 vectype_in
, vectype_out
);
3203 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3204 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3206 if (dump_enabled_p ())
3207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3208 "mixed mask and nonmask vector types\n");
3213 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3214 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3215 if (known_eq (nunits_in
* 2, nunits_out
))
3217 else if (known_eq (nunits_out
, nunits_in
))
3219 else if (known_eq (nunits_out
* 2, nunits_in
))
3224 /* We only handle functions that do not read or clobber memory. */
3225 if (gimple_vuse (stmt
))
3227 if (dump_enabled_p ())
3228 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3229 "function reads from or writes to memory.\n");
3233 /* For now, we only vectorize functions if a target specific builtin
3234 is available. TODO -- in some cases, it might be profitable to
3235 insert the calls for pieces of the vector, in order to be able
3236 to vectorize other operations in the loop. */
3238 internal_fn ifn
= IFN_LAST
;
3239 tree callee
= gimple_call_fndecl (stmt
);
3241 /* First try using an internal function. */
3242 tree_code convert_code
= ERROR_MARK
;
3244 && (modifier
== NONE
3245 || (modifier
== NARROW
3246 && simple_integer_narrowing (vectype_out
, vectype_in
,
3248 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3251 /* If that fails, try asking for a target-specific built-in function. */
3252 if (ifn
== IFN_LAST
)
3254 if (cfn
!= CFN_LAST
)
3255 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3256 (cfn
, vectype_out
, vectype_in
);
3257 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3258 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3259 (callee
, vectype_out
, vectype_in
);
3262 if (ifn
== IFN_LAST
&& !fndecl
)
3264 if (cfn
== CFN_GOMP_SIMD_LANE
3267 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3268 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3269 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3270 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3272 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3273 { 0, 1, 2, ... vf - 1 } vector. */
3274 gcc_assert (nargs
== 0);
3276 else if (modifier
== NONE
3277 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3278 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3279 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3280 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3281 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3282 slp_op
, vectype_in
, cost_vec
);
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3287 "function is not vectorizable.\n");
3294 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3295 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3297 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3299 /* Sanity check: make sure that at least one copy of the vectorized stmt
3300 needs to be generated. */
3301 gcc_assert (ncopies
>= 1);
3303 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3304 if (!vec_stmt
) /* transformation not required. */
3307 for (i
= 0; i
< nargs
; ++i
)
3308 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3312 "incompatible vector types for invariants\n");
3315 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3316 DUMP_VECT_SCOPE ("vectorizable_call");
3317 vect_model_simple_cost (vinfo
, stmt_info
,
3318 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3319 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3320 record_stmt_cost (cost_vec
, ncopies
/ 2,
3321 vec_promote_demote
, stmt_info
, 0, vect_body
);
3323 if (loop_vinfo
&& mask_opno
>= 0)
3325 unsigned int nvectors
= (slp_node
3326 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3328 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3329 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3330 vectype_out
, scalar_mask
);
3337 if (dump_enabled_p ())
3338 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3341 scalar_dest
= gimple_call_lhs (stmt
);
3342 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3344 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3346 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3348 tree prev_res
= NULL_TREE
;
3349 vargs
.safe_grow (nargs
, true);
3350 orig_vargs
.safe_grow (nargs
, true);
3351 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3352 for (j
= 0; j
< ncopies
; ++j
)
3354 /* Build argument list for the vectorized call. */
3357 vec
<tree
> vec_oprnds0
;
3359 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3360 vec_oprnds0
= vec_defs
[0];
3362 /* Arguments are ready. Create the new vector stmt. */
3363 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3366 for (k
= 0; k
< nargs
; k
++)
3368 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3369 vargs
[k
] = vec_oprndsk
[i
];
3372 if (modifier
== NARROW
)
3374 /* We don't define any narrowing conditional functions
3376 gcc_assert (mask_opno
< 0);
3377 tree half_res
= make_ssa_name (vectype_in
);
3379 = gimple_build_call_internal_vec (ifn
, vargs
);
3380 gimple_call_set_lhs (call
, half_res
);
3381 gimple_call_set_nothrow (call
, true);
3382 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3385 prev_res
= half_res
;
3388 new_temp
= make_ssa_name (vec_dest
);
3389 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3390 prev_res
, half_res
);
3391 vect_finish_stmt_generation (vinfo
, stmt_info
,
3396 if (mask_opno
>= 0 && masked_loop_p
)
3398 unsigned int vec_num
= vec_oprnds0
.length ();
3399 /* Always true for SLP. */
3400 gcc_assert (ncopies
== 1);
3401 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3403 vargs
[mask_opno
] = prepare_load_store_mask
3404 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3408 if (ifn
!= IFN_LAST
)
3409 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3411 call
= gimple_build_call_vec (fndecl
, vargs
);
3412 new_temp
= make_ssa_name (vec_dest
, call
);
3413 gimple_call_set_lhs (call
, new_temp
);
3414 gimple_call_set_nothrow (call
, true);
3415 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3418 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3423 for (i
= 0; i
< nargs
; i
++)
3425 op
= gimple_call_arg (stmt
, i
);
3428 vec_defs
.quick_push (vNULL
);
3429 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3433 orig_vargs
[i
] = vargs
[i
] = vec_defs
[i
][j
];
3436 if (mask_opno
>= 0 && masked_loop_p
)
3438 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3441 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3442 vargs
[mask_opno
], gsi
);
3446 if (cfn
== CFN_GOMP_SIMD_LANE
)
3448 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3450 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3451 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3452 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3453 new_temp
= make_ssa_name (vec_dest
);
3454 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3455 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3457 else if (modifier
== NARROW
)
3459 /* We don't define any narrowing conditional functions at
3461 gcc_assert (mask_opno
< 0);
3462 tree half_res
= make_ssa_name (vectype_in
);
3463 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3464 gimple_call_set_lhs (call
, half_res
);
3465 gimple_call_set_nothrow (call
, true);
3466 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3469 prev_res
= half_res
;
3472 new_temp
= make_ssa_name (vec_dest
);
3473 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3474 prev_res
, half_res
);
3475 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3480 if (ifn
!= IFN_LAST
)
3481 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3483 call
= gimple_build_call_vec (fndecl
, vargs
);
3484 new_temp
= make_ssa_name (vec_dest
, call
);
3485 gimple_call_set_lhs (call
, new_temp
);
3486 gimple_call_set_nothrow (call
, true);
3487 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3491 if (j
== (modifier
== NARROW
? 1 : 0))
3492 *vec_stmt
= new_stmt
;
3493 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3495 for (i
= 0; i
< nargs
; i
++)
3497 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3498 vec_oprndsi
.release ();
3501 else if (modifier
== NARROW
)
3503 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3504 /* We don't define any narrowing conditional functions at present. */
3505 gcc_assert (mask_opno
< 0);
3506 for (j
= 0; j
< ncopies
; ++j
)
3508 /* Build argument list for the vectorized call. */
3510 vargs
.create (nargs
* 2);
3516 vec
<tree
> vec_oprnds0
;
3518 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3519 vec_oprnds0
= vec_defs
[0];
3521 /* Arguments are ready. Create the new vector stmt. */
3522 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3526 for (k
= 0; k
< nargs
; k
++)
3528 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3529 vargs
.quick_push (vec_oprndsk
[i
]);
3530 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3533 if (ifn
!= IFN_LAST
)
3534 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3536 call
= gimple_build_call_vec (fndecl
, vargs
);
3537 new_temp
= make_ssa_name (vec_dest
, call
);
3538 gimple_call_set_lhs (call
, new_temp
);
3539 gimple_call_set_nothrow (call
, true);
3540 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3541 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3546 for (i
= 0; i
< nargs
; i
++)
3548 op
= gimple_call_arg (stmt
, i
);
3551 vec_defs
.quick_push (vNULL
);
3552 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3553 op
, &vec_defs
[i
], vectypes
[i
]);
3555 vec_oprnd0
= vec_defs
[i
][2*j
];
3556 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3558 vargs
.quick_push (vec_oprnd0
);
3559 vargs
.quick_push (vec_oprnd1
);
3562 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3563 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3564 gimple_call_set_lhs (new_stmt
, new_temp
);
3565 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3567 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3571 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3573 for (i
= 0; i
< nargs
; i
++)
3575 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3576 vec_oprndsi
.release ();
3580 /* No current target implements this case. */
3585 /* The call in STMT might prevent it from being removed in dce.
3586 We however cannot remove it here, due to the way the ssa name
3587 it defines is mapped to the new definition. So just replace
3588 rhs of the statement with something harmless. */
3593 stmt_info
= vect_orig_stmt (stmt_info
);
3594 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3597 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3598 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3604 struct simd_call_arg_info
3608 HOST_WIDE_INT linear_step
;
3609 enum vect_def_type dt
;
3611 bool simd_lane_linear
;
3614 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3615 is linear within simd lane (but not within whole loop), note it in
3619 vect_simd_lane_linear (tree op
, class loop
*loop
,
3620 struct simd_call_arg_info
*arginfo
)
3622 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3624 if (!is_gimple_assign (def_stmt
)
3625 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3626 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3629 tree base
= gimple_assign_rhs1 (def_stmt
);
3630 HOST_WIDE_INT linear_step
= 0;
3631 tree v
= gimple_assign_rhs2 (def_stmt
);
3632 while (TREE_CODE (v
) == SSA_NAME
)
3635 def_stmt
= SSA_NAME_DEF_STMT (v
);
3636 if (is_gimple_assign (def_stmt
))
3637 switch (gimple_assign_rhs_code (def_stmt
))
3640 t
= gimple_assign_rhs2 (def_stmt
);
3641 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3643 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3644 v
= gimple_assign_rhs1 (def_stmt
);
3647 t
= gimple_assign_rhs2 (def_stmt
);
3648 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3650 linear_step
= tree_to_shwi (t
);
3651 v
= gimple_assign_rhs1 (def_stmt
);
3654 t
= gimple_assign_rhs1 (def_stmt
);
3655 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3656 || (TYPE_PRECISION (TREE_TYPE (v
))
3657 < TYPE_PRECISION (TREE_TYPE (t
))))
3666 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3668 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3669 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3674 arginfo
->linear_step
= linear_step
;
3676 arginfo
->simd_lane_linear
= true;
3682 /* Return the number of elements in vector type VECTYPE, which is associated
3683 with a SIMD clone. At present these vectors always have a constant
3686 static unsigned HOST_WIDE_INT
3687 simd_clone_subparts (tree vectype
)
3689 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3692 /* Function vectorizable_simd_clone_call.
3694 Check if STMT_INFO performs a function call that can be vectorized
3695 by calling a simd clone of the function.
3696 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3697 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3698 Return true if STMT_INFO is vectorizable in this way. */
3701 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3702 gimple_stmt_iterator
*gsi
,
3703 gimple
**vec_stmt
, slp_tree slp_node
,
3704 stmt_vector_for_cost
*)
3709 tree vec_oprnd0
= NULL_TREE
;
3712 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3713 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3714 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3715 tree fndecl
, new_temp
;
3717 auto_vec
<simd_call_arg_info
> arginfo
;
3718 vec
<tree
> vargs
= vNULL
;
3720 tree lhs
, rtype
, ratype
;
3721 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3723 /* Is STMT a vectorizable call? */
3724 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3728 fndecl
= gimple_call_fndecl (stmt
);
3729 if (fndecl
== NULL_TREE
)
3732 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3733 if (node
== NULL
|| node
->simd_clones
== NULL
)
3736 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3739 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3743 if (gimple_call_lhs (stmt
)
3744 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3747 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3749 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3751 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3758 /* Process function arguments. */
3759 nargs
= gimple_call_num_args (stmt
);
3761 /* Bail out if the function has zero arguments. */
3765 arginfo
.reserve (nargs
, true);
3767 for (i
= 0; i
< nargs
; i
++)
3769 simd_call_arg_info thisarginfo
;
3772 thisarginfo
.linear_step
= 0;
3773 thisarginfo
.align
= 0;
3774 thisarginfo
.op
= NULL_TREE
;
3775 thisarginfo
.simd_lane_linear
= false;
3777 op
= gimple_call_arg (stmt
, i
);
3778 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3779 &thisarginfo
.vectype
)
3780 || thisarginfo
.dt
== vect_uninitialized_def
)
3782 if (dump_enabled_p ())
3783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3784 "use not simple.\n");
3788 if (thisarginfo
.dt
== vect_constant_def
3789 || thisarginfo
.dt
== vect_external_def
)
3790 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3793 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3794 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3796 if (dump_enabled_p ())
3797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3798 "vector mask arguments are not supported\n");
3803 /* For linear arguments, the analyze phase should have saved
3804 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3805 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3806 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3808 gcc_assert (vec_stmt
);
3809 thisarginfo
.linear_step
3810 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3812 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3813 thisarginfo
.simd_lane_linear
3814 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3815 == boolean_true_node
);
3816 /* If loop has been peeled for alignment, we need to adjust it. */
3817 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3818 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3819 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3821 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3822 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3823 tree opt
= TREE_TYPE (thisarginfo
.op
);
3824 bias
= fold_convert (TREE_TYPE (step
), bias
);
3825 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3827 = fold_build2 (POINTER_TYPE_P (opt
)
3828 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3829 thisarginfo
.op
, bias
);
3833 && thisarginfo
.dt
!= vect_constant_def
3834 && thisarginfo
.dt
!= vect_external_def
3836 && TREE_CODE (op
) == SSA_NAME
3837 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3839 && tree_fits_shwi_p (iv
.step
))
3841 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3842 thisarginfo
.op
= iv
.base
;
3844 else if ((thisarginfo
.dt
== vect_constant_def
3845 || thisarginfo
.dt
== vect_external_def
)
3846 && POINTER_TYPE_P (TREE_TYPE (op
)))
3847 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3848 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3850 if (POINTER_TYPE_P (TREE_TYPE (op
))
3851 && !thisarginfo
.linear_step
3853 && thisarginfo
.dt
!= vect_constant_def
3854 && thisarginfo
.dt
!= vect_external_def
3857 && TREE_CODE (op
) == SSA_NAME
)
3858 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3860 arginfo
.quick_push (thisarginfo
);
3863 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3864 if (!vf
.is_constant ())
3866 if (dump_enabled_p ())
3867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3868 "not considering SIMD clones; not yet supported"
3869 " for variable-width vectors.\n");
3873 unsigned int badness
= 0;
3874 struct cgraph_node
*bestn
= NULL
;
3875 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3876 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3878 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3879 n
= n
->simdclone
->next_clone
)
3881 unsigned int this_badness
= 0;
3882 unsigned int num_calls
;
3883 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
3884 || n
->simdclone
->nargs
!= nargs
)
3887 this_badness
+= exact_log2 (num_calls
) * 1024;
3888 if (n
->simdclone
->inbranch
)
3889 this_badness
+= 2048;
3890 int target_badness
= targetm
.simd_clone
.usable (n
);
3891 if (target_badness
< 0)
3893 this_badness
+= target_badness
* 512;
3894 /* FORNOW: Have to add code to add the mask argument. */
3895 if (n
->simdclone
->inbranch
)
3897 for (i
= 0; i
< nargs
; i
++)
3899 switch (n
->simdclone
->args
[i
].arg_type
)
3901 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3902 if (!useless_type_conversion_p
3903 (n
->simdclone
->args
[i
].orig_type
,
3904 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3906 else if (arginfo
[i
].dt
== vect_constant_def
3907 || arginfo
[i
].dt
== vect_external_def
3908 || arginfo
[i
].linear_step
)
3911 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3912 if (arginfo
[i
].dt
!= vect_constant_def
3913 && arginfo
[i
].dt
!= vect_external_def
)
3916 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3917 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3918 if (arginfo
[i
].dt
== vect_constant_def
3919 || arginfo
[i
].dt
== vect_external_def
3920 || (arginfo
[i
].linear_step
3921 != n
->simdclone
->args
[i
].linear_step
))
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3928 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3929 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3933 case SIMD_CLONE_ARG_TYPE_MASK
:
3936 if (i
== (size_t) -1)
3938 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3943 if (arginfo
[i
].align
)
3944 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3945 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3947 if (i
== (size_t) -1)
3949 if (bestn
== NULL
|| this_badness
< badness
)
3952 badness
= this_badness
;
3959 for (i
= 0; i
< nargs
; i
++)
3960 if ((arginfo
[i
].dt
== vect_constant_def
3961 || arginfo
[i
].dt
== vect_external_def
)
3962 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3964 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
3965 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
3967 if (arginfo
[i
].vectype
== NULL
3968 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
3969 simd_clone_subparts (arginfo
[i
].vectype
)))
3973 fndecl
= bestn
->decl
;
3974 nunits
= bestn
->simdclone
->simdlen
;
3975 ncopies
= vector_unroll_factor (vf
, nunits
);
3977 /* If the function isn't const, only allow it in simd loops where user
3978 has asserted that at least nunits consecutive iterations can be
3979 performed using SIMD instructions. */
3980 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
3981 && gimple_vuse (stmt
))
3984 /* Sanity check: make sure that at least one copy of the vectorized stmt
3985 needs to be generated. */
3986 gcc_assert (ncopies
>= 1);
3988 if (!vec_stmt
) /* transformation not required. */
3990 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3991 for (i
= 0; i
< nargs
; i
++)
3992 if ((bestn
->simdclone
->args
[i
].arg_type
3993 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3994 || (bestn
->simdclone
->args
[i
].arg_type
3995 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4000 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4001 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4002 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4003 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4004 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4005 tree sll
= arginfo
[i
].simd_lane_linear
4006 ? boolean_true_node
: boolean_false_node
;
4007 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4009 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4010 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4011 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4012 dt, slp_node, cost_vec); */
4018 if (dump_enabled_p ())
4019 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4022 scalar_dest
= gimple_call_lhs (stmt
);
4023 vec_dest
= NULL_TREE
;
4028 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4029 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4030 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4033 rtype
= TREE_TYPE (ratype
);
4037 auto_vec
<vec
<tree
> > vec_oprnds
;
4038 auto_vec
<unsigned> vec_oprnds_i
;
4039 vec_oprnds
.safe_grow_cleared (nargs
, true);
4040 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4041 for (j
= 0; j
< ncopies
; ++j
)
4043 /* Build argument list for the vectorized call. */
4045 vargs
.create (nargs
);
4049 for (i
= 0; i
< nargs
; i
++)
4051 unsigned int k
, l
, m
, o
;
4053 op
= gimple_call_arg (stmt
, i
);
4054 switch (bestn
->simdclone
->args
[i
].arg_type
)
4056 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4057 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4058 o
= vector_unroll_factor (nunits
,
4059 simd_clone_subparts (atype
));
4060 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4062 if (simd_clone_subparts (atype
)
4063 < simd_clone_subparts (arginfo
[i
].vectype
))
4065 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4066 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4067 / simd_clone_subparts (atype
));
4068 gcc_assert ((k
& (k
- 1)) == 0);
4071 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4072 ncopies
* o
/ k
, op
,
4074 vec_oprnds_i
[i
] = 0;
4075 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4079 vec_oprnd0
= arginfo
[i
].op
;
4080 if ((m
& (k
- 1)) == 0)
4081 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4083 arginfo
[i
].op
= vec_oprnd0
;
4085 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4087 bitsize_int ((m
& (k
- 1)) * prec
));
4089 = gimple_build_assign (make_ssa_name (atype
),
4091 vect_finish_stmt_generation (vinfo
, stmt_info
,
4093 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4097 k
= (simd_clone_subparts (atype
)
4098 / simd_clone_subparts (arginfo
[i
].vectype
));
4099 gcc_assert ((k
& (k
- 1)) == 0);
4100 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4102 vec_alloc (ctor_elts
, k
);
4105 for (l
= 0; l
< k
; l
++)
4107 if (m
== 0 && l
== 0)
4109 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4113 vec_oprnds_i
[i
] = 0;
4114 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4117 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4118 arginfo
[i
].op
= vec_oprnd0
;
4121 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4125 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4129 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4131 = gimple_build_assign (make_ssa_name (atype
),
4133 vect_finish_stmt_generation (vinfo
, stmt_info
,
4135 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4138 vargs
.safe_push (vec_oprnd0
);
4141 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4143 = gimple_build_assign (make_ssa_name (atype
),
4145 vect_finish_stmt_generation (vinfo
, stmt_info
,
4147 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4152 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4153 vargs
.safe_push (op
);
4155 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4156 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4161 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4162 &stmts
, true, NULL_TREE
);
4166 edge pe
= loop_preheader_edge (loop
);
4167 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4168 gcc_assert (!new_bb
);
4170 if (arginfo
[i
].simd_lane_linear
)
4172 vargs
.safe_push (arginfo
[i
].op
);
4175 tree phi_res
= copy_ssa_name (op
);
4176 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4177 add_phi_arg (new_phi
, arginfo
[i
].op
,
4178 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4180 = POINTER_TYPE_P (TREE_TYPE (op
))
4181 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4182 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4183 ? sizetype
: TREE_TYPE (op
);
4185 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4187 tree tcst
= wide_int_to_tree (type
, cst
);
4188 tree phi_arg
= copy_ssa_name (op
);
4190 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4191 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4192 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4193 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4195 arginfo
[i
].op
= phi_res
;
4196 vargs
.safe_push (phi_res
);
4201 = POINTER_TYPE_P (TREE_TYPE (op
))
4202 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4203 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4204 ? sizetype
: TREE_TYPE (op
);
4206 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4208 tree tcst
= wide_int_to_tree (type
, cst
);
4209 new_temp
= make_ssa_name (TREE_TYPE (op
));
4211 = gimple_build_assign (new_temp
, code
,
4212 arginfo
[i
].op
, tcst
);
4213 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4214 vargs
.safe_push (new_temp
);
4217 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4218 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4219 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4220 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4221 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4222 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4228 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4232 || known_eq (simd_clone_subparts (rtype
), nunits
));
4234 new_temp
= create_tmp_var (ratype
);
4235 else if (useless_type_conversion_p (vectype
, rtype
))
4236 new_temp
= make_ssa_name (vec_dest
, new_call
);
4238 new_temp
= make_ssa_name (rtype
, new_call
);
4239 gimple_call_set_lhs (new_call
, new_temp
);
4241 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4242 gimple
*new_stmt
= new_call
;
4246 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4249 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4250 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4251 k
= vector_unroll_factor (nunits
,
4252 simd_clone_subparts (vectype
));
4253 gcc_assert ((k
& (k
- 1)) == 0);
4254 for (l
= 0; l
< k
; l
++)
4259 t
= build_fold_addr_expr (new_temp
);
4260 t
= build2 (MEM_REF
, vectype
, t
,
4261 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4264 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4265 bitsize_int (prec
), bitsize_int (l
* prec
));
4266 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4267 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4269 if (j
== 0 && l
== 0)
4270 *vec_stmt
= new_stmt
;
4271 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4275 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4278 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4280 unsigned int k
= (simd_clone_subparts (vectype
)
4281 / simd_clone_subparts (rtype
));
4282 gcc_assert ((k
& (k
- 1)) == 0);
4283 if ((j
& (k
- 1)) == 0)
4284 vec_alloc (ret_ctor_elts
, k
);
4288 o
= vector_unroll_factor (nunits
,
4289 simd_clone_subparts (rtype
));
4290 for (m
= 0; m
< o
; m
++)
4292 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4293 size_int (m
), NULL_TREE
, NULL_TREE
);
4294 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4296 vect_finish_stmt_generation (vinfo
, stmt_info
,
4298 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4299 gimple_assign_lhs (new_stmt
));
4301 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4304 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4305 if ((j
& (k
- 1)) != k
- 1)
4307 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4309 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4310 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4312 if ((unsigned) j
== k
- 1)
4313 *vec_stmt
= new_stmt
;
4314 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4319 tree t
= build_fold_addr_expr (new_temp
);
4320 t
= build2 (MEM_REF
, vectype
, t
,
4321 build_int_cst (TREE_TYPE (t
), 0));
4322 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4323 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4324 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4326 else if (!useless_type_conversion_p (vectype
, rtype
))
4328 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4330 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4331 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4336 *vec_stmt
= new_stmt
;
4337 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4340 for (i
= 0; i
< nargs
; ++i
)
4342 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4347 /* The call in STMT might prevent it from being removed in dce.
4348 We however cannot remove it here, due to the way the ssa name
4349 it defines is mapped to the new definition. So just replace
4350 rhs of the statement with something harmless. */
4358 type
= TREE_TYPE (scalar_dest
);
4359 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4360 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4363 new_stmt
= gimple_build_nop ();
4364 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4365 unlink_stmt_vdef (stmt
);
4371 /* Function vect_gen_widened_results_half
4373 Create a vector stmt whose code, type, number of arguments, and result
4374 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4375 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4376 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4377 needs to be created (DECL is a function-decl of a target-builtin).
4378 STMT_INFO is the original scalar stmt that we are vectorizing. */
4381 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4382 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4383 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4384 stmt_vec_info stmt_info
)
4389 /* Generate half of the widened result: */
4390 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4391 if (op_type
!= binary_op
)
4393 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4394 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4395 gimple_assign_set_lhs (new_stmt
, new_temp
);
4396 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4402 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4403 For multi-step conversions store the resulting vectors and call the function
4407 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4409 stmt_vec_info stmt_info
,
4411 gimple_stmt_iterator
*gsi
,
4412 slp_tree slp_node
, enum tree_code code
)
4415 tree vop0
, vop1
, new_tmp
, vec_dest
;
4417 vec_dest
= vec_dsts
.pop ();
4419 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4421 /* Create demotion operation. */
4422 vop0
= (*vec_oprnds
)[i
];
4423 vop1
= (*vec_oprnds
)[i
+ 1];
4424 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4425 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4426 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4427 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4430 /* Store the resulting vector for next recursive call. */
4431 (*vec_oprnds
)[i
/2] = new_tmp
;
4434 /* This is the last step of the conversion sequence. Store the
4435 vectors in SLP_NODE or in vector info of the scalar statement
4436 (or in STMT_VINFO_RELATED_STMT chain). */
4438 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4440 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4444 /* For multi-step demotion operations we first generate demotion operations
4445 from the source type to the intermediate types, and then combine the
4446 results (stored in VEC_OPRNDS) in demotion operation to the destination
4450 /* At each level of recursion we have half of the operands we had at the
4452 vec_oprnds
->truncate ((i
+1)/2);
4453 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4455 stmt_info
, vec_dsts
, gsi
,
4456 slp_node
, VEC_PACK_TRUNC_EXPR
);
4459 vec_dsts
.quick_push (vec_dest
);
4463 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4464 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4465 STMT_INFO. For multi-step conversions store the resulting vectors and
4466 call the function recursively. */
4469 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4470 vec
<tree
> *vec_oprnds0
,
4471 vec
<tree
> *vec_oprnds1
,
4472 stmt_vec_info stmt_info
, tree vec_dest
,
4473 gimple_stmt_iterator
*gsi
,
4474 enum tree_code code1
,
4475 enum tree_code code2
, int op_type
)
4478 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4479 gimple
*new_stmt1
, *new_stmt2
;
4480 vec
<tree
> vec_tmp
= vNULL
;
4482 vec_tmp
.create (vec_oprnds0
->length () * 2);
4483 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4485 if (op_type
== binary_op
)
4486 vop1
= (*vec_oprnds1
)[i
];
4490 /* Generate the two halves of promotion operation. */
4491 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4492 op_type
, vec_dest
, gsi
,
4494 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4495 op_type
, vec_dest
, gsi
,
4497 if (is_gimple_call (new_stmt1
))
4499 new_tmp1
= gimple_call_lhs (new_stmt1
);
4500 new_tmp2
= gimple_call_lhs (new_stmt2
);
4504 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4505 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4508 /* Store the results for the next step. */
4509 vec_tmp
.quick_push (new_tmp1
);
4510 vec_tmp
.quick_push (new_tmp2
);
4513 vec_oprnds0
->release ();
4514 *vec_oprnds0
= vec_tmp
;
4518 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4519 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4520 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4521 Return true if STMT_INFO is vectorizable in this way. */
4524 vectorizable_conversion (vec_info
*vinfo
,
4525 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4526 gimple
**vec_stmt
, slp_tree slp_node
,
4527 stmt_vector_for_cost
*cost_vec
)
4531 tree op0
, op1
= NULL_TREE
;
4532 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4533 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4534 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4536 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4538 poly_uint64 nunits_in
;
4539 poly_uint64 nunits_out
;
4540 tree vectype_out
, vectype_in
;
4542 tree lhs_type
, rhs_type
;
4543 enum { NARROW
, NONE
, WIDEN
} modifier
;
4544 vec
<tree
> vec_oprnds0
= vNULL
;
4545 vec
<tree
> vec_oprnds1
= vNULL
;
4547 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4548 int multi_step_cvt
= 0;
4549 vec
<tree
> interm_types
= vNULL
;
4550 tree intermediate_type
, cvt_type
= NULL_TREE
;
4552 unsigned short fltsz
;
4554 /* Is STMT a vectorizable conversion? */
4556 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4559 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4563 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4567 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4570 code
= gimple_assign_rhs_code (stmt
);
4571 if (!CONVERT_EXPR_CODE_P (code
)
4572 && code
!= FIX_TRUNC_EXPR
4573 && code
!= FLOAT_EXPR
4574 && code
!= WIDEN_PLUS_EXPR
4575 && code
!= WIDEN_MINUS_EXPR
4576 && code
!= WIDEN_MULT_EXPR
4577 && code
!= WIDEN_LSHIFT_EXPR
)
4580 op_type
= TREE_CODE_LENGTH (code
);
4582 /* Check types of lhs and rhs. */
4583 scalar_dest
= gimple_assign_lhs (stmt
);
4584 lhs_type
= TREE_TYPE (scalar_dest
);
4585 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4587 /* Check the operands of the operation. */
4588 slp_tree slp_op0
, slp_op1
= NULL
;
4589 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4590 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4592 if (dump_enabled_p ())
4593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4594 "use not simple.\n");
4598 rhs_type
= TREE_TYPE (op0
);
4599 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4600 && !((INTEGRAL_TYPE_P (lhs_type
)
4601 && INTEGRAL_TYPE_P (rhs_type
))
4602 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4603 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4606 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4607 && ((INTEGRAL_TYPE_P (lhs_type
)
4608 && !type_has_mode_precision_p (lhs_type
))
4609 || (INTEGRAL_TYPE_P (rhs_type
)
4610 && !type_has_mode_precision_p (rhs_type
))))
4612 if (dump_enabled_p ())
4613 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4614 "type conversion to/from bit-precision unsupported."
4619 if (op_type
== binary_op
)
4621 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
4622 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
4624 op1
= gimple_assign_rhs2 (stmt
);
4626 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4627 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4629 if (dump_enabled_p ())
4630 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4631 "use not simple.\n");
4634 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4637 vectype_in
= vectype1_in
;
4640 /* If op0 is an external or constant def, infer the vector type
4641 from the scalar type. */
4643 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4645 gcc_assert (vectype_in
);
4648 if (dump_enabled_p ())
4649 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4650 "no vectype for scalar type %T\n", rhs_type
);
4655 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4656 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4658 if (dump_enabled_p ())
4659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4660 "can't convert between boolean and non "
4661 "boolean vectors %T\n", rhs_type
);
4666 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4667 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4668 if (known_eq (nunits_out
, nunits_in
))
4670 else if (multiple_p (nunits_out
, nunits_in
))
4674 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4678 /* Multiple types in SLP are handled by creating the appropriate number of
4679 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4683 else if (modifier
== NARROW
)
4684 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4686 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4688 /* Sanity check: make sure that at least one copy of the vectorized stmt
4689 needs to be generated. */
4690 gcc_assert (ncopies
>= 1);
4692 bool found_mode
= false;
4693 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4694 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4695 opt_scalar_mode rhs_mode_iter
;
4697 /* Supportable by target? */
4701 if (code
!= FIX_TRUNC_EXPR
4702 && code
!= FLOAT_EXPR
4703 && !CONVERT_EXPR_CODE_P (code
))
4705 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4709 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4711 "conversion not supported by target.\n");
4715 if (supportable_widening_operation (vinfo
, code
, stmt_info
, vectype_out
,
4716 vectype_in
, &code1
, &code2
,
4717 &multi_step_cvt
, &interm_types
))
4719 /* Binary widening operation can only be supported directly by the
4721 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4725 if (code
!= FLOAT_EXPR
4726 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4729 fltsz
= GET_MODE_SIZE (lhs_mode
);
4730 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4732 rhs_mode
= rhs_mode_iter
.require ();
4733 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4737 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4738 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4739 if (cvt_type
== NULL_TREE
)
4742 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4744 if (!supportable_convert_operation (code
, vectype_out
,
4745 cvt_type
, &codecvt1
))
4748 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4749 vectype_out
, cvt_type
,
4750 &codecvt1
, &codecvt2
,
4755 gcc_assert (multi_step_cvt
== 0);
4757 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
4759 vectype_in
, &code1
, &code2
,
4760 &multi_step_cvt
, &interm_types
))
4770 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4771 codecvt2
= ERROR_MARK
;
4775 interm_types
.safe_push (cvt_type
);
4776 cvt_type
= NULL_TREE
;
4781 gcc_assert (op_type
== unary_op
);
4782 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4783 &code1
, &multi_step_cvt
,
4787 if (code
!= FIX_TRUNC_EXPR
4788 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4792 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4793 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4794 if (cvt_type
== NULL_TREE
)
4796 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4799 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4800 &code1
, &multi_step_cvt
,
4809 if (!vec_stmt
) /* transformation not required. */
4812 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
4813 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
4815 if (dump_enabled_p ())
4816 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4817 "incompatible vector types for invariants\n");
4820 DUMP_VECT_SCOPE ("vectorizable_conversion");
4821 if (modifier
== NONE
)
4823 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4824 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4827 else if (modifier
== NARROW
)
4829 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4830 /* The final packing step produces one vector result per copy. */
4831 unsigned int nvectors
4832 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
4833 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4834 multi_step_cvt
, cost_vec
);
4838 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4839 /* The initial unpacking step produces two vector results
4840 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4841 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4842 unsigned int nvectors
4844 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
4846 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4847 multi_step_cvt
, cost_vec
);
4849 interm_types
.release ();
4854 if (dump_enabled_p ())
4855 dump_printf_loc (MSG_NOTE
, vect_location
,
4856 "transform conversion. ncopies = %d.\n", ncopies
);
4858 if (op_type
== binary_op
)
4860 if (CONSTANT_CLASS_P (op0
))
4861 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4862 else if (CONSTANT_CLASS_P (op1
))
4863 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4866 /* In case of multi-step conversion, we first generate conversion operations
4867 to the intermediate types, and then from that types to the final one.
4868 We create vector destinations for the intermediate type (TYPES) received
4869 from supportable_*_operation, and store them in the correct order
4870 for future use in vect_create_vectorized_*_stmts (). */
4871 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4872 vec_dest
= vect_create_destination_var (scalar_dest
,
4873 (cvt_type
&& modifier
== WIDEN
)
4874 ? cvt_type
: vectype_out
);
4875 vec_dsts
.quick_push (vec_dest
);
4879 for (i
= interm_types
.length () - 1;
4880 interm_types
.iterate (i
, &intermediate_type
); i
--)
4882 vec_dest
= vect_create_destination_var (scalar_dest
,
4884 vec_dsts
.quick_push (vec_dest
);
4889 vec_dest
= vect_create_destination_var (scalar_dest
,
4891 ? vectype_out
: cvt_type
);
4896 if (modifier
== WIDEN
)
4898 else if (modifier
== NARROW
)
4901 ninputs
= vect_pow2 (multi_step_cvt
);
4909 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
4911 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4913 /* Arguments are ready, create the new vector stmt. */
4914 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4915 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4916 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4917 gimple_assign_set_lhs (new_stmt
, new_temp
);
4918 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4921 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4923 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4928 /* In case the vectorization factor (VF) is bigger than the number
4929 of elements that we can fit in a vectype (nunits), we have to
4930 generate more than one vector stmt - i.e - we need to "unroll"
4931 the vector stmt by a factor VF/nunits. */
4932 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
4934 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
4936 if (code
== WIDEN_LSHIFT_EXPR
)
4938 int oprnds_size
= vec_oprnds0
.length ();
4939 vec_oprnds1
.create (oprnds_size
);
4940 for (i
= 0; i
< oprnds_size
; ++i
)
4941 vec_oprnds1
.quick_push (op1
);
4943 /* Arguments are ready. Create the new vector stmts. */
4944 for (i
= multi_step_cvt
; i
>= 0; i
--)
4946 tree this_dest
= vec_dsts
[i
];
4947 enum tree_code c1
= code1
, c2
= code2
;
4948 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4953 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
4954 &vec_oprnds1
, stmt_info
,
4959 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4964 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4965 new_temp
= make_ssa_name (vec_dest
);
4966 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
4967 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4970 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4973 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4975 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4980 /* In case the vectorization factor (VF) is bigger than the number
4981 of elements that we can fit in a vectype (nunits), we have to
4982 generate more than one vector stmt - i.e - we need to "unroll"
4983 the vector stmt by a factor VF/nunits. */
4984 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
4986 /* Arguments are ready. Create the new vector stmts. */
4988 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4990 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4991 new_temp
= make_ssa_name (vec_dest
);
4993 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
4994 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4995 vec_oprnds0
[i
] = new_temp
;
4998 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5000 stmt_info
, vec_dsts
, gsi
,
5005 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5007 vec_oprnds0
.release ();
5008 vec_oprnds1
.release ();
5009 interm_types
.release ();
5014 /* Return true if we can assume from the scalar form of STMT_INFO that
5015 neither the scalar nor the vector forms will generate code. STMT_INFO
5016 is known not to involve a data reference. */
5019 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5021 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5025 tree lhs
= gimple_assign_lhs (stmt
);
5026 tree_code code
= gimple_assign_rhs_code (stmt
);
5027 tree rhs
= gimple_assign_rhs1 (stmt
);
5029 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5032 if (CONVERT_EXPR_CODE_P (code
))
5033 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5038 /* Function vectorizable_assignment.
5040 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5041 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5042 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5043 Return true if STMT_INFO is vectorizable in this way. */
5046 vectorizable_assignment (vec_info
*vinfo
,
5047 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5048 gimple
**vec_stmt
, slp_tree slp_node
,
5049 stmt_vector_for_cost
*cost_vec
)
5054 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5056 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5060 vec
<tree
> vec_oprnds
= vNULL
;
5062 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5063 enum tree_code code
;
5066 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5069 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5073 /* Is vectorizable assignment? */
5074 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5078 scalar_dest
= gimple_assign_lhs (stmt
);
5079 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5082 if (STMT_VINFO_DATA_REF (stmt_info
))
5085 code
= gimple_assign_rhs_code (stmt
);
5086 if (!(gimple_assign_single_p (stmt
)
5087 || code
== PAREN_EXPR
5088 || CONVERT_EXPR_CODE_P (code
)))
5091 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5092 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5094 /* Multiple types in SLP are handled by creating the appropriate number of
5095 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5100 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5102 gcc_assert (ncopies
>= 1);
5105 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5106 &dt
[0], &vectype_in
))
5108 if (dump_enabled_p ())
5109 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5110 "use not simple.\n");
5114 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5116 /* We can handle NOP_EXPR conversions that do not change the number
5117 of elements or the vector size. */
5118 if ((CONVERT_EXPR_CODE_P (code
)
5119 || code
== VIEW_CONVERT_EXPR
)
5121 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5122 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5123 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5126 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
5127 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5129 if (dump_enabled_p ())
5130 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5131 "can't convert between boolean and non "
5132 "boolean vectors %T\n", TREE_TYPE (op
));
5137 /* We do not handle bit-precision changes. */
5138 if ((CONVERT_EXPR_CODE_P (code
)
5139 || code
== VIEW_CONVERT_EXPR
)
5140 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5141 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5142 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5143 /* But a conversion that does not change the bit-pattern is ok. */
5144 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5145 > TYPE_PRECISION (TREE_TYPE (op
)))
5146 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5148 if (dump_enabled_p ())
5149 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5150 "type conversion to/from bit-precision "
5155 if (!vec_stmt
) /* transformation not required. */
5158 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5160 if (dump_enabled_p ())
5161 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5162 "incompatible vector types for invariants\n");
5165 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5166 DUMP_VECT_SCOPE ("vectorizable_assignment");
5167 if (!vect_nop_conversion_p (stmt_info
))
5168 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5174 if (dump_enabled_p ())
5175 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5178 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5181 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5183 /* Arguments are ready. create the new vector stmt. */
5184 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5186 if (CONVERT_EXPR_CODE_P (code
)
5187 || code
== VIEW_CONVERT_EXPR
)
5188 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5189 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5190 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5191 gimple_assign_set_lhs (new_stmt
, new_temp
);
5192 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5194 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5196 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5199 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5201 vec_oprnds
.release ();
5206 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5207 either as shift by a scalar or by a vector. */
5210 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5213 machine_mode vec_mode
;
5218 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5222 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5224 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5226 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5228 || (optab_handler (optab
, TYPE_MODE (vectype
))
5229 == CODE_FOR_nothing
))
5233 vec_mode
= TYPE_MODE (vectype
);
5234 icode
= (int) optab_handler (optab
, vec_mode
);
5235 if (icode
== CODE_FOR_nothing
)
5242 /* Function vectorizable_shift.
5244 Check if STMT_INFO performs a shift operation that can be vectorized.
5245 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5246 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5247 Return true if STMT_INFO is vectorizable in this way. */
5250 vectorizable_shift (vec_info
*vinfo
,
5251 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5252 gimple
**vec_stmt
, slp_tree slp_node
,
5253 stmt_vector_for_cost
*cost_vec
)
5257 tree op0
, op1
= NULL
;
5258 tree vec_oprnd1
= NULL_TREE
;
5260 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5261 enum tree_code code
;
5262 machine_mode vec_mode
;
5266 machine_mode optab_op2_mode
;
5267 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5269 poly_uint64 nunits_in
;
5270 poly_uint64 nunits_out
;
5275 vec
<tree
> vec_oprnds0
= vNULL
;
5276 vec
<tree
> vec_oprnds1
= vNULL
;
5279 bool scalar_shift_arg
= true;
5280 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5281 bool incompatible_op1_vectype_p
= false;
5283 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5286 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5287 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5291 /* Is STMT a vectorizable binary/unary operation? */
5292 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5296 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5299 code
= gimple_assign_rhs_code (stmt
);
5301 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5302 || code
== RROTATE_EXPR
))
5305 scalar_dest
= gimple_assign_lhs (stmt
);
5306 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5307 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5309 if (dump_enabled_p ())
5310 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5311 "bit-precision shifts not supported.\n");
5316 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5317 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5319 if (dump_enabled_p ())
5320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5321 "use not simple.\n");
5324 /* If op0 is an external or constant def, infer the vector type
5325 from the scalar type. */
5327 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5329 gcc_assert (vectype
);
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5334 "no vectype for scalar type\n");
5338 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5339 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5340 if (maybe_ne (nunits_out
, nunits_in
))
5343 stmt_vec_info op1_def_stmt_info
;
5345 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5346 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5348 if (dump_enabled_p ())
5349 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5350 "use not simple.\n");
5354 /* Multiple types in SLP are handled by creating the appropriate number of
5355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5360 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5362 gcc_assert (ncopies
>= 1);
5364 /* Determine whether the shift amount is a vector, or scalar. If the
5365 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5367 if ((dt
[1] == vect_internal_def
5368 || dt
[1] == vect_induction_def
5369 || dt
[1] == vect_nested_cycle
)
5371 scalar_shift_arg
= false;
5372 else if (dt
[1] == vect_constant_def
5373 || dt
[1] == vect_external_def
5374 || dt
[1] == vect_internal_def
)
5376 /* In SLP, need to check whether the shift count is the same,
5377 in loops if it is a constant or invariant, it is always
5381 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5382 stmt_vec_info slpstmt_info
;
5384 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5386 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5387 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5388 scalar_shift_arg
= false;
5391 /* For internal SLP defs we have to make sure we see scalar stmts
5392 for all vector elements.
5393 ??? For different vectors we could resort to a different
5394 scalar shift operand but code-generation below simply always
5396 if (dt
[1] == vect_internal_def
5397 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5399 scalar_shift_arg
= false;
5402 /* If the shift amount is computed by a pattern stmt we cannot
5403 use the scalar amount directly thus give up and use a vector
5405 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5406 scalar_shift_arg
= false;
5410 if (dump_enabled_p ())
5411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5412 "operand mode requires invariant argument.\n");
5416 /* Vector shifted by vector. */
5417 bool was_scalar_shift_arg
= scalar_shift_arg
;
5418 if (!scalar_shift_arg
)
5420 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5421 if (dump_enabled_p ())
5422 dump_printf_loc (MSG_NOTE
, vect_location
,
5423 "vector/vector shift/rotate found.\n");
5426 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5428 incompatible_op1_vectype_p
5429 = (op1_vectype
== NULL_TREE
5430 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5431 TYPE_VECTOR_SUBPARTS (vectype
))
5432 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5433 if (incompatible_op1_vectype_p
5435 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5436 || slp_op1
->refcnt
!= 1))
5438 if (dump_enabled_p ())
5439 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5440 "unusable type for last operand in"
5441 " vector/vector shift/rotate.\n");
5445 /* See if the machine has a vector shifted by scalar insn and if not
5446 then see if it has a vector shifted by vector insn. */
5449 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5451 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5453 if (dump_enabled_p ())
5454 dump_printf_loc (MSG_NOTE
, vect_location
,
5455 "vector/scalar shift/rotate found.\n");
5459 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5461 && (optab_handler (optab
, TYPE_MODE (vectype
))
5462 != CODE_FOR_nothing
))
5464 scalar_shift_arg
= false;
5466 if (dump_enabled_p ())
5467 dump_printf_loc (MSG_NOTE
, vect_location
,
5468 "vector/vector shift/rotate found.\n");
5471 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5475 /* Unlike the other binary operators, shifts/rotates have
5476 the rhs being int, instead of the same type as the lhs,
5477 so make sure the scalar is the right type if we are
5478 dealing with vectors of long long/long/short/char. */
5479 incompatible_op1_vectype_p
5481 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5483 if (incompatible_op1_vectype_p
5484 && dt
[1] == vect_internal_def
)
5486 if (dump_enabled_p ())
5487 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5488 "unusable type for last operand in"
5489 " vector/vector shift/rotate.\n");
5496 /* Supportable by target? */
5499 if (dump_enabled_p ())
5500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5504 vec_mode
= TYPE_MODE (vectype
);
5505 icode
= (int) optab_handler (optab
, vec_mode
);
5506 if (icode
== CODE_FOR_nothing
)
5508 if (dump_enabled_p ())
5509 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5510 "op not supported by target.\n");
5511 /* Check only during analysis. */
5512 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5514 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5516 if (dump_enabled_p ())
5517 dump_printf_loc (MSG_NOTE
, vect_location
,
5518 "proceeding using word mode.\n");
5521 /* Worthwhile without SIMD support? Check only during analysis. */
5523 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5524 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5526 if (dump_enabled_p ())
5527 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5528 "not worthwhile without SIMD support.\n");
5532 if (!vec_stmt
) /* transformation not required. */
5535 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5536 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5537 && (!incompatible_op1_vectype_p
5538 || dt
[1] == vect_constant_def
)
5539 && !vect_maybe_update_slp_op_vectype
5541 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5543 if (dump_enabled_p ())
5544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5545 "incompatible vector types for invariants\n");
5548 /* Now adjust the constant shift amount in place. */
5550 && incompatible_op1_vectype_p
5551 && dt
[1] == vect_constant_def
)
5553 for (unsigned i
= 0;
5554 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5556 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5557 = fold_convert (TREE_TYPE (vectype
),
5558 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5559 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5563 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5564 DUMP_VECT_SCOPE ("vectorizable_shift");
5565 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5566 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_NOTE
, vect_location
,
5574 "transform binary/unary operation.\n");
5576 if (incompatible_op1_vectype_p
&& !slp_node
)
5578 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
5579 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5580 if (dt
[1] != vect_constant_def
)
5581 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5582 TREE_TYPE (vectype
), NULL
);
5586 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5588 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
5590 /* Vector shl and shr insn patterns can be defined with scalar
5591 operand 2 (shift operand). In this case, use constant or loop
5592 invariant op1 directly, without extending it to vector mode
5594 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5595 if (!VECTOR_MODE_P (optab_op2_mode
))
5597 if (dump_enabled_p ())
5598 dump_printf_loc (MSG_NOTE
, vect_location
,
5599 "operand 1 using scalar mode.\n");
5601 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5602 vec_oprnds1
.quick_push (vec_oprnd1
);
5603 /* Store vec_oprnd1 for every vector stmt to be created.
5604 We check during the analysis that all the shift arguments
5606 TODO: Allow different constants for different vector
5607 stmts generated for an SLP instance. */
5609 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5610 vec_oprnds1
.quick_push (vec_oprnd1
);
5613 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
5615 if (was_scalar_shift_arg
)
5617 /* If the argument was the same in all lanes create
5618 the correctly typed vector shift amount directly. */
5619 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5620 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5621 !loop_vinfo
? gsi
: NULL
);
5622 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5623 !loop_vinfo
? gsi
: NULL
);
5624 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5625 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5626 vec_oprnds1
.quick_push (vec_oprnd1
);
5628 else if (dt
[1] == vect_constant_def
)
5629 /* The constant shift amount has been adjusted in place. */
5632 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5635 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5636 (a special case for certain kind of vector shifts); otherwise,
5637 operand 1 should be of a vector type (the usual case). */
5638 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5640 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5642 /* Arguments are ready. Create the new vector stmt. */
5643 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5645 /* For internal defs where we need to use a scalar shift arg
5646 extract the first lane. */
5647 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
5649 vop1
= vec_oprnds1
[0];
5650 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
5652 = gimple_build_assign (new_temp
,
5653 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
5655 TYPE_SIZE (TREE_TYPE (new_temp
)),
5656 bitsize_zero_node
));
5657 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5661 vop1
= vec_oprnds1
[i
];
5662 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5663 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5664 gimple_assign_set_lhs (new_stmt
, new_temp
);
5665 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5667 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5669 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5673 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5675 vec_oprnds0
.release ();
5676 vec_oprnds1
.release ();
5682 /* Function vectorizable_operation.
5684 Check if STMT_INFO performs a binary, unary or ternary operation that can
5686 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5687 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5688 Return true if STMT_INFO is vectorizable in this way. */
5691 vectorizable_operation (vec_info
*vinfo
,
5692 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5693 gimple
**vec_stmt
, slp_tree slp_node
,
5694 stmt_vector_for_cost
*cost_vec
)
5698 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5700 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5701 enum tree_code code
, orig_code
;
5702 machine_mode vec_mode
;
5706 bool target_support_p
;
5707 enum vect_def_type dt
[3]
5708 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5710 poly_uint64 nunits_in
;
5711 poly_uint64 nunits_out
;
5713 int ncopies
, vec_num
;
5715 vec
<tree
> vec_oprnds0
= vNULL
;
5716 vec
<tree
> vec_oprnds1
= vNULL
;
5717 vec
<tree
> vec_oprnds2
= vNULL
;
5718 tree vop0
, vop1
, vop2
;
5719 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5721 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5724 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5728 /* Is STMT a vectorizable binary/unary operation? */
5729 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5733 /* Loads and stores are handled in vectorizable_{load,store}. */
5734 if (STMT_VINFO_DATA_REF (stmt_info
))
5737 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5739 /* Shifts are handled in vectorizable_shift. */
5740 if (code
== LSHIFT_EXPR
5741 || code
== RSHIFT_EXPR
5742 || code
== LROTATE_EXPR
5743 || code
== RROTATE_EXPR
)
5746 /* Comparisons are handled in vectorizable_comparison. */
5747 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
5750 /* Conditions are handled in vectorizable_condition. */
5751 if (code
== COND_EXPR
)
5754 /* For pointer addition and subtraction, we should use the normal
5755 plus and minus for the vector operation. */
5756 if (code
== POINTER_PLUS_EXPR
)
5758 if (code
== POINTER_DIFF_EXPR
)
5761 /* Support only unary or binary operations. */
5762 op_type
= TREE_CODE_LENGTH (code
);
5763 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5767 "num. args = %d (not unary/binary/ternary op).\n",
5772 scalar_dest
= gimple_assign_lhs (stmt
);
5773 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5775 /* Most operations cannot handle bit-precision types without extra
5777 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
5779 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5780 /* Exception are bitwise binary operations. */
5781 && code
!= BIT_IOR_EXPR
5782 && code
!= BIT_XOR_EXPR
5783 && code
!= BIT_AND_EXPR
)
5785 if (dump_enabled_p ())
5786 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5787 "bit-precision arithmetic not supported.\n");
5792 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5793 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5795 if (dump_enabled_p ())
5796 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5797 "use not simple.\n");
5800 /* If op0 is an external or constant def, infer the vector type
5801 from the scalar type. */
5804 /* For boolean type we cannot determine vectype by
5805 invariant value (don't know whether it is a vector
5806 of booleans or vector of integers). We use output
5807 vectype because operations on boolean don't change
5809 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5811 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5813 if (dump_enabled_p ())
5814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5815 "not supported operation on bool value.\n");
5818 vectype
= vectype_out
;
5821 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
5825 gcc_assert (vectype
);
5828 if (dump_enabled_p ())
5829 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5830 "no vectype for scalar type %T\n",
5836 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5837 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5838 if (maybe_ne (nunits_out
, nunits_in
))
5841 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
5842 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
5843 if (op_type
== binary_op
|| op_type
== ternary_op
)
5845 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5846 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
5848 if (dump_enabled_p ())
5849 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5850 "use not simple.\n");
5854 if (op_type
== ternary_op
)
5856 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5857 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
5859 if (dump_enabled_p ())
5860 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5861 "use not simple.\n");
5866 /* Multiple types in SLP are handled by creating the appropriate number of
5867 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5872 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5876 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5880 gcc_assert (ncopies
>= 1);
5882 /* Reject attempts to combine mask types with nonmask types, e.g. if
5883 we have an AND between a (nonmask) boolean loaded from memory and
5884 a (mask) boolean result of a comparison.
5886 TODO: We could easily fix these cases up using pattern statements. */
5887 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
5888 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
5889 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
5891 if (dump_enabled_p ())
5892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5893 "mixed mask and nonmask vector types\n");
5897 /* Supportable by target? */
5899 vec_mode
= TYPE_MODE (vectype
);
5900 if (code
== MULT_HIGHPART_EXPR
)
5901 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5904 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5907 if (dump_enabled_p ())
5908 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5912 target_support_p
= (optab_handler (optab
, vec_mode
)
5913 != CODE_FOR_nothing
);
5916 if (!target_support_p
)
5918 if (dump_enabled_p ())
5919 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5920 "op not supported by target.\n");
5921 /* Check only during analysis. */
5922 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5923 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5925 if (dump_enabled_p ())
5926 dump_printf_loc (MSG_NOTE
, vect_location
,
5927 "proceeding using word mode.\n");
5930 /* Worthwhile without SIMD support? Check only during analysis. */
5931 if (!VECTOR_MODE_P (vec_mode
)
5933 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5935 if (dump_enabled_p ())
5936 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5937 "not worthwhile without SIMD support.\n");
5941 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
5942 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
5943 internal_fn cond_fn
= get_conditional_internal_fn (code
);
5945 if (!vec_stmt
) /* transformation not required. */
5947 /* If this operation is part of a reduction, a fully-masked loop
5948 should only change the active lanes of the reduction chain,
5949 keeping the inactive lanes as-is. */
5951 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5954 if (cond_fn
== IFN_LAST
5955 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
5956 OPTIMIZE_FOR_SPEED
))
5958 if (dump_enabled_p ())
5959 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5960 "can't use a fully-masked loop because no"
5961 " conditional operation is available.\n");
5962 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
5965 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
5969 /* Put types on constant and invariant SLP children. */
5971 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5972 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
5973 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
5975 if (dump_enabled_p ())
5976 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5977 "incompatible vector types for invariants\n");
5981 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5982 DUMP_VECT_SCOPE ("vectorizable_operation");
5983 vect_model_simple_cost (vinfo
, stmt_info
,
5984 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5990 if (dump_enabled_p ())
5991 dump_printf_loc (MSG_NOTE
, vect_location
,
5992 "transform binary/unary operation.\n");
5994 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
5996 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5997 vectors with unsigned elements, but the result is signed. So, we
5998 need to compute the MINUS_EXPR into vectype temporary and
5999 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6000 tree vec_cvt_dest
= NULL_TREE
;
6001 if (orig_code
== POINTER_DIFF_EXPR
)
6003 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6004 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6008 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6010 /* In case the vectorization factor (VF) is bigger than the number
6011 of elements that we can fit in a vectype (nunits), we have to generate
6012 more than one vector stmt - i.e - we need to "unroll" the
6013 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6014 from one copy of the vector stmt to the next, in the field
6015 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6016 stages to find the correct vector defs to be used when vectorizing
6017 stmts that use the defs of the current stmt. The example below
6018 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6019 we need to create 4 vectorized stmts):
6021 before vectorization:
6022 RELATED_STMT VEC_STMT
6026 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6028 RELATED_STMT VEC_STMT
6029 VS1_0: vx0 = memref0 VS1_1 -
6030 VS1_1: vx1 = memref1 VS1_2 -
6031 VS1_2: vx2 = memref2 VS1_3 -
6032 VS1_3: vx3 = memref3 - -
6033 S1: x = load - VS1_0
6036 step2: vectorize stmt S2 (done here):
6037 To vectorize stmt S2 we first need to find the relevant vector
6038 def for the first operand 'x'. This is, as usual, obtained from
6039 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6040 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6041 relevant vector def 'vx0'. Having found 'vx0' we can generate
6042 the vector stmt VS2_0, and as usual, record it in the
6043 STMT_VINFO_VEC_STMT of stmt S2.
6044 When creating the second copy (VS2_1), we obtain the relevant vector
6045 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6046 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6047 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6048 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6049 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6050 chain of stmts and pointers:
6051 RELATED_STMT VEC_STMT
6052 VS1_0: vx0 = memref0 VS1_1 -
6053 VS1_1: vx1 = memref1 VS1_2 -
6054 VS1_2: vx2 = memref2 VS1_3 -
6055 VS1_3: vx3 = memref3 - -
6056 S1: x = load - VS1_0
6057 VS2_0: vz0 = vx0 + v1 VS2_1 -
6058 VS2_1: vz1 = vx1 + v1 VS2_2 -
6059 VS2_2: vz2 = vx2 + v1 VS2_3 -
6060 VS2_3: vz3 = vx3 + v1 - -
6061 S2: z = x + 1 - VS2_0 */
6063 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6064 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6065 /* Arguments are ready. Create the new vector stmt. */
6066 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6068 gimple
*new_stmt
= NULL
;
6069 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6070 ? vec_oprnds1
[i
] : NULL_TREE
);
6071 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6072 if (masked_loop_p
&& reduc_idx
>= 0)
6074 /* Perform the operation on active elements only and take
6075 inactive elements from the reduction chain input. */
6077 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6078 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6080 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6082 new_temp
= make_ssa_name (vec_dest
, call
);
6083 gimple_call_set_lhs (call
, new_temp
);
6084 gimple_call_set_nothrow (call
, true);
6085 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6090 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6091 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6092 gimple_assign_set_lhs (new_stmt
, new_temp
);
6093 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6096 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6097 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6099 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6100 gimple_assign_set_lhs (new_stmt
, new_temp
);
6101 vect_finish_stmt_generation (vinfo
, stmt_info
,
6106 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6108 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6112 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6114 vec_oprnds0
.release ();
6115 vec_oprnds1
.release ();
6116 vec_oprnds2
.release ();
6121 /* A helper function to ensure data reference DR_INFO's base alignment. */
6124 ensure_base_align (dr_vec_info
*dr_info
)
6126 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6129 if (dr_info
->base_misaligned
)
6131 tree base_decl
= dr_info
->base_decl
;
6133 // We should only be able to increase the alignment of a base object if
6134 // we know what its new alignment should be at compile time.
6135 unsigned HOST_WIDE_INT align_base_to
=
6136 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6138 if (decl_in_symtab_p (base_decl
))
6139 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6140 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6142 SET_DECL_ALIGN (base_decl
, align_base_to
);
6143 DECL_USER_ALIGN (base_decl
) = 1;
6145 dr_info
->base_misaligned
= false;
6150 /* Function get_group_alias_ptr_type.
6152 Return the alias type for the group starting at FIRST_STMT_INFO. */
6155 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6157 struct data_reference
*first_dr
, *next_dr
;
6159 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6160 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6161 while (next_stmt_info
)
6163 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6164 if (get_alias_set (DR_REF (first_dr
))
6165 != get_alias_set (DR_REF (next_dr
)))
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_NOTE
, vect_location
,
6169 "conflicting alias set types.\n");
6170 return ptr_type_node
;
6172 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6174 return reference_alias_ptr_type (DR_REF (first_dr
));
6178 /* Function scan_operand_equal_p.
6180 Helper function for check_scan_store. Compare two references
6181 with .GOMP_SIMD_LANE bases. */
6184 scan_operand_equal_p (tree ref1
, tree ref2
)
6186 tree ref
[2] = { ref1
, ref2
};
6187 poly_int64 bitsize
[2], bitpos
[2];
6188 tree offset
[2], base
[2];
6189 for (int i
= 0; i
< 2; ++i
)
6192 int unsignedp
, reversep
, volatilep
= 0;
6193 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6194 &offset
[i
], &mode
, &unsignedp
,
6195 &reversep
, &volatilep
);
6196 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6198 if (TREE_CODE (base
[i
]) == MEM_REF
6199 && offset
[i
] == NULL_TREE
6200 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6202 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6203 if (is_gimple_assign (def_stmt
)
6204 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6205 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6206 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6208 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6210 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6211 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6216 if (!operand_equal_p (base
[0], base
[1], 0))
6218 if (maybe_ne (bitsize
[0], bitsize
[1]))
6220 if (offset
[0] != offset
[1])
6222 if (!offset
[0] || !offset
[1])
6224 if (!operand_equal_p (offset
[0], offset
[1], 0))
6227 for (int i
= 0; i
< 2; ++i
)
6229 step
[i
] = integer_one_node
;
6230 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6232 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6233 if (is_gimple_assign (def_stmt
)
6234 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6235 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6238 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6239 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6242 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6244 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6245 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6247 tree rhs1
= NULL_TREE
;
6248 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6250 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6251 if (gimple_assign_cast_p (def_stmt
))
6252 rhs1
= gimple_assign_rhs1 (def_stmt
);
6254 else if (CONVERT_EXPR_P (offset
[i
]))
6255 rhs1
= TREE_OPERAND (offset
[i
], 0);
6257 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6258 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6259 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6260 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6263 if (!operand_equal_p (offset
[0], offset
[1], 0)
6264 || !operand_equal_p (step
[0], step
[1], 0))
6272 enum scan_store_kind
{
6273 /* Normal permutation. */
6274 scan_store_kind_perm
,
6276 /* Whole vector left shift permutation with zero init. */
6277 scan_store_kind_lshift_zero
,
6279 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6280 scan_store_kind_lshift_cond
6283 /* Function check_scan_store.
6285 Verify if we can perform the needed permutations or whole vector shifts.
6286 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6287 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6288 to do at each step. */
6291 scan_store_can_perm_p (tree vectype
, tree init
,
6292 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6294 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6295 unsigned HOST_WIDE_INT nunits
;
6296 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6298 int units_log2
= exact_log2 (nunits
);
6299 if (units_log2
<= 0)
6303 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6304 for (i
= 0; i
<= units_log2
; ++i
)
6306 unsigned HOST_WIDE_INT j
, k
;
6307 enum scan_store_kind kind
= scan_store_kind_perm
;
6308 vec_perm_builder
sel (nunits
, nunits
, 1);
6309 sel
.quick_grow (nunits
);
6310 if (i
== units_log2
)
6312 for (j
= 0; j
< nunits
; ++j
)
6313 sel
[j
] = nunits
- 1;
6317 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6319 for (k
= 0; j
< nunits
; ++j
, ++k
)
6320 sel
[j
] = nunits
+ k
;
6322 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6323 if (!can_vec_perm_const_p (vec_mode
, indices
))
6325 if (i
== units_log2
)
6328 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6330 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6332 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6333 /* Whole vector shifts shift in zeros, so if init is all zero
6334 constant, there is no need to do anything further. */
6335 if ((TREE_CODE (init
) != INTEGER_CST
6336 && TREE_CODE (init
) != REAL_CST
)
6337 || !initializer_zerop (init
))
6339 tree masktype
= truth_type_for (vectype
);
6340 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6342 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6345 kind
= whole_vector_shift_kind
;
6347 if (use_whole_vector
)
6349 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6350 use_whole_vector
->safe_grow_cleared (i
, true);
6351 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6352 use_whole_vector
->safe_push (kind
);
6360 /* Function check_scan_store.
6362 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6365 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6366 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6367 vect_memory_access_type memory_access_type
)
6369 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6370 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6373 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6376 || memory_access_type
!= VMAT_CONTIGUOUS
6377 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6378 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6379 || loop_vinfo
== NULL
6380 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6381 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6382 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6383 || !integer_zerop (DR_INIT (dr_info
->dr
))
6384 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6385 || !alias_sets_conflict_p (get_alias_set (vectype
),
6386 get_alias_set (TREE_TYPE (ref_type
))))
6388 if (dump_enabled_p ())
6389 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6390 "unsupported OpenMP scan store.\n");
6394 /* We need to pattern match code built by OpenMP lowering and simplified
6395 by following optimizations into something we can handle.
6396 #pragma omp simd reduction(inscan,+:r)
6400 #pragma omp scan inclusive (r)
6403 shall have body with:
6404 // Initialization for input phase, store the reduction initializer:
6405 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6406 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6408 // Actual input phase:
6410 r.0_5 = D.2042[_20];
6413 // Initialization for scan phase:
6414 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6420 // Actual scan phase:
6422 r.1_8 = D.2042[_20];
6424 The "omp simd array" variable D.2042 holds the privatized copy used
6425 inside of the loop and D.2043 is another one that holds copies of
6426 the current original list item. The separate GOMP_SIMD_LANE ifn
6427 kinds are there in order to allow optimizing the initializer store
6428 and combiner sequence, e.g. if it is originally some C++ish user
6429 defined reduction, but allow the vectorizer to pattern recognize it
6430 and turn into the appropriate vectorized scan.
6432 For exclusive scan, this is slightly different:
6433 #pragma omp simd reduction(inscan,+:r)
6437 #pragma omp scan exclusive (r)
6440 shall have body with:
6441 // Initialization for input phase, store the reduction initializer:
6442 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6443 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6445 // Actual input phase:
6447 r.0_5 = D.2042[_20];
6450 // Initialization for scan phase:
6451 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6457 // Actual scan phase:
6459 r.1_8 = D.2044[_20];
6462 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6464 /* Match the D.2042[_21] = 0; store above. Just require that
6465 it is a constant or external definition store. */
6466 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6469 if (dump_enabled_p ())
6470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6471 "unsupported OpenMP scan initializer store.\n");
6475 if (! loop_vinfo
->scan_map
)
6476 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6477 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6478 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6481 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6483 /* These stores can be vectorized normally. */
6487 if (rhs_dt
!= vect_internal_def
)
6490 if (dump_enabled_p ())
6491 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6492 "unsupported OpenMP scan combiner pattern.\n");
6496 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6497 tree rhs
= gimple_assign_rhs1 (stmt
);
6498 if (TREE_CODE (rhs
) != SSA_NAME
)
6501 gimple
*other_store_stmt
= NULL
;
6502 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6503 bool inscan_var_store
6504 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6506 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6508 if (!inscan_var_store
)
6510 use_operand_p use_p
;
6511 imm_use_iterator iter
;
6512 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6514 gimple
*use_stmt
= USE_STMT (use_p
);
6515 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6517 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6518 || !is_gimple_assign (use_stmt
)
6519 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6521 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6523 other_store_stmt
= use_stmt
;
6525 if (other_store_stmt
== NULL
)
6527 rhs
= gimple_assign_lhs (other_store_stmt
);
6528 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6532 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6534 use_operand_p use_p
;
6535 imm_use_iterator iter
;
6536 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6538 gimple
*use_stmt
= USE_STMT (use_p
);
6539 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6541 if (other_store_stmt
)
6543 other_store_stmt
= use_stmt
;
6549 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6550 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6551 || !is_gimple_assign (def_stmt
)
6552 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6555 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6556 /* For pointer addition, we should use the normal plus for the vector
6560 case POINTER_PLUS_EXPR
:
6563 case MULT_HIGHPART_EXPR
:
6568 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6571 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6572 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6573 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6576 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6577 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6578 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6579 || !gimple_assign_load_p (load1_stmt
)
6580 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6581 || !gimple_assign_load_p (load2_stmt
))
6584 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6585 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6586 if (load1_stmt_info
== NULL
6587 || load2_stmt_info
== NULL
6588 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6589 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6590 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6591 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6594 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6596 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6597 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6598 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6600 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6602 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6606 use_operand_p use_p
;
6607 imm_use_iterator iter
;
6608 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6610 gimple
*use_stmt
= USE_STMT (use_p
);
6611 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6613 if (other_store_stmt
)
6615 other_store_stmt
= use_stmt
;
6619 if (other_store_stmt
== NULL
)
6621 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6622 || !gimple_store_p (other_store_stmt
))
6625 stmt_vec_info other_store_stmt_info
6626 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6627 if (other_store_stmt_info
== NULL
6628 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6629 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6632 gimple
*stmt1
= stmt
;
6633 gimple
*stmt2
= other_store_stmt
;
6634 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6635 std::swap (stmt1
, stmt2
);
6636 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6637 gimple_assign_rhs1 (load2_stmt
)))
6639 std::swap (rhs1
, rhs2
);
6640 std::swap (load1_stmt
, load2_stmt
);
6641 std::swap (load1_stmt_info
, load2_stmt_info
);
6643 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6644 gimple_assign_rhs1 (load1_stmt
)))
6647 tree var3
= NULL_TREE
;
6648 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6649 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6650 gimple_assign_rhs1 (load2_stmt
)))
6652 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6654 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6655 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6656 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6658 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6659 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6660 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6661 || lookup_attribute ("omp simd inscan exclusive",
6662 DECL_ATTRIBUTES (var3
)))
6666 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6667 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6668 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6671 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6672 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6673 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6674 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6675 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6676 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6679 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6680 std::swap (var1
, var2
);
6682 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6684 if (!lookup_attribute ("omp simd inscan exclusive",
6685 DECL_ATTRIBUTES (var1
)))
6690 if (loop_vinfo
->scan_map
== NULL
)
6692 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6696 /* The IL is as expected, now check if we can actually vectorize it.
6703 should be vectorized as (where _40 is the vectorized rhs
6704 from the D.2042[_21] = 0; store):
6705 _30 = MEM <vector(8) int> [(int *)&D.2043];
6706 _31 = MEM <vector(8) int> [(int *)&D.2042];
6707 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6709 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6710 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6712 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6713 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6714 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6716 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6717 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6719 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6720 MEM <vector(8) int> [(int *)&D.2043] = _39;
6721 MEM <vector(8) int> [(int *)&D.2042] = _38;
6728 should be vectorized as (where _40 is the vectorized rhs
6729 from the D.2042[_21] = 0; store):
6730 _30 = MEM <vector(8) int> [(int *)&D.2043];
6731 _31 = MEM <vector(8) int> [(int *)&D.2042];
6732 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6733 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6735 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6736 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6737 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6739 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6740 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6741 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6743 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6744 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6747 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6748 MEM <vector(8) int> [(int *)&D.2044] = _39;
6749 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6750 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6751 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6752 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
6755 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
6756 if (units_log2
== -1)
6763 /* Function vectorizable_scan_store.
6765 Helper of vectorizable_score, arguments like on vectorizable_store.
6766 Handle only the transformation, checking is done in check_scan_store. */
6769 vectorizable_scan_store (vec_info
*vinfo
,
6770 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6771 gimple
**vec_stmt
, int ncopies
)
6773 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6774 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6775 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
6776 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6778 if (dump_enabled_p ())
6779 dump_printf_loc (MSG_NOTE
, vect_location
,
6780 "transform scan store. ncopies = %d\n", ncopies
);
6782 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6783 tree rhs
= gimple_assign_rhs1 (stmt
);
6784 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
6786 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6787 bool inscan_var_store
6788 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6790 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6792 use_operand_p use_p
;
6793 imm_use_iterator iter
;
6794 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6796 gimple
*use_stmt
= USE_STMT (use_p
);
6797 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6799 rhs
= gimple_assign_lhs (use_stmt
);
6804 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6805 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6806 if (code
== POINTER_PLUS_EXPR
)
6808 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
6809 && commutative_tree_code (code
));
6810 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6811 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6812 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
6813 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6814 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6815 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6816 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6817 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6818 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6819 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6820 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6822 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6824 std::swap (rhs1
, rhs2
);
6825 std::swap (var1
, var2
);
6826 std::swap (load1_dr_info
, load2_dr_info
);
6829 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6832 unsigned HOST_WIDE_INT nunits
;
6833 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6835 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
6836 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
6837 gcc_assert (units_log2
> 0);
6838 auto_vec
<tree
, 16> perms
;
6839 perms
.quick_grow (units_log2
+ 1);
6840 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
6841 for (int i
= 0; i
<= units_log2
; ++i
)
6843 unsigned HOST_WIDE_INT j
, k
;
6844 vec_perm_builder
sel (nunits
, nunits
, 1);
6845 sel
.quick_grow (nunits
);
6846 if (i
== units_log2
)
6847 for (j
= 0; j
< nunits
; ++j
)
6848 sel
[j
] = nunits
- 1;
6851 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6853 for (k
= 0; j
< nunits
; ++j
, ++k
)
6854 sel
[j
] = nunits
+ k
;
6856 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6857 if (!use_whole_vector
.is_empty ()
6858 && use_whole_vector
[i
] != scan_store_kind_perm
)
6860 if (zero_vec
== NULL_TREE
)
6861 zero_vec
= build_zero_cst (vectype
);
6862 if (masktype
== NULL_TREE
6863 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
6864 masktype
= truth_type_for (vectype
);
6865 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
6868 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
6871 tree vec_oprnd1
= NULL_TREE
;
6872 tree vec_oprnd2
= NULL_TREE
;
6873 tree vec_oprnd3
= NULL_TREE
;
6874 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
6875 tree dataref_offset
= build_int_cst (ref_type
, 0);
6876 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
6877 vectype
, VMAT_CONTIGUOUS
);
6878 tree ldataref_ptr
= NULL_TREE
;
6879 tree orig
= NULL_TREE
;
6880 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6881 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
6882 auto_vec
<tree
> vec_oprnds1
;
6883 auto_vec
<tree
> vec_oprnds2
;
6884 auto_vec
<tree
> vec_oprnds3
;
6885 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
6886 *init
, &vec_oprnds1
,
6887 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
6888 rhs2
, &vec_oprnds3
);
6889 for (int j
= 0; j
< ncopies
; j
++)
6891 vec_oprnd1
= vec_oprnds1
[j
];
6892 if (ldataref_ptr
== NULL
)
6893 vec_oprnd2
= vec_oprnds2
[j
];
6894 vec_oprnd3
= vec_oprnds3
[j
];
6897 else if (!inscan_var_store
)
6898 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
6902 vec_oprnd2
= make_ssa_name (vectype
);
6903 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
6904 unshare_expr (ldataref_ptr
),
6906 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
6907 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
6908 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6909 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6910 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6913 tree v
= vec_oprnd2
;
6914 for (int i
= 0; i
< units_log2
; ++i
)
6916 tree new_temp
= make_ssa_name (vectype
);
6917 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
6919 && (use_whole_vector
[i
]
6920 != scan_store_kind_perm
))
6921 ? zero_vec
: vec_oprnd1
, v
,
6923 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6924 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6925 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6927 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
6929 /* Whole vector shift shifted in zero bits, but if *init
6930 is not initializer_zerop, we need to replace those elements
6931 with elements from vec_oprnd1. */
6932 tree_vector_builder
vb (masktype
, nunits
, 1);
6933 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
6934 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
6935 ? boolean_false_node
: boolean_true_node
);
6937 tree new_temp2
= make_ssa_name (vectype
);
6938 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
6939 new_temp
, vec_oprnd1
);
6940 vect_finish_stmt_generation (vinfo
, stmt_info
,
6942 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6943 new_temp
= new_temp2
;
6946 /* For exclusive scan, perform the perms[i] permutation once
6949 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
6957 tree new_temp2
= make_ssa_name (vectype
);
6958 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
6959 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6960 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6965 tree new_temp
= make_ssa_name (vectype
);
6966 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
6967 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6968 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6970 tree last_perm_arg
= new_temp
;
6971 /* For exclusive scan, new_temp computed above is the exclusive scan
6972 prefix sum. Turn it into inclusive prefix sum for the broadcast
6973 of the last element into orig. */
6974 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6976 last_perm_arg
= make_ssa_name (vectype
);
6977 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
6978 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6979 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6982 orig
= make_ssa_name (vectype
);
6983 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
6984 last_perm_arg
, perms
[units_log2
]);
6985 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6986 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6988 if (!inscan_var_store
)
6990 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
6991 unshare_expr (dataref_ptr
),
6993 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
6994 g
= gimple_build_assign (data_ref
, new_temp
);
6995 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6996 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7000 if (inscan_var_store
)
7001 for (int j
= 0; j
< ncopies
; j
++)
7004 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7006 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7007 unshare_expr (dataref_ptr
),
7009 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7010 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7011 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7012 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7018 /* Function vectorizable_store.
7020 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7021 that can be vectorized.
7022 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7023 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7024 Return true if STMT_INFO is vectorizable in this way. */
7027 vectorizable_store (vec_info
*vinfo
,
7028 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7029 gimple
**vec_stmt
, slp_tree slp_node
,
7030 stmt_vector_for_cost
*cost_vec
)
7034 tree vec_oprnd
= NULL_TREE
;
7036 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7037 class loop
*loop
= NULL
;
7038 machine_mode vec_mode
;
7040 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7041 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7042 tree dataref_ptr
= NULL_TREE
;
7043 tree dataref_offset
= NULL_TREE
;
7044 gimple
*ptr_incr
= NULL
;
7047 stmt_vec_info first_stmt_info
;
7049 unsigned int group_size
, i
;
7050 vec
<tree
> oprnds
= vNULL
;
7051 vec
<tree
> result_chain
= vNULL
;
7052 tree offset
= NULL_TREE
;
7053 vec
<tree
> vec_oprnds
= vNULL
;
7054 bool slp
= (slp_node
!= NULL
);
7055 unsigned int vec_num
;
7056 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7058 gather_scatter_info gs_info
;
7060 vec_load_store_type vls_type
;
7063 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7066 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7070 /* Is vectorizable store? */
7072 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7073 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7075 tree scalar_dest
= gimple_assign_lhs (assign
);
7076 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7077 && is_pattern_stmt_p (stmt_info
))
7078 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7079 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7080 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7081 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7082 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7083 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7084 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7085 && TREE_CODE (scalar_dest
) != MEM_REF
)
7090 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7091 if (!call
|| !gimple_call_internal_p (call
))
7094 internal_fn ifn
= gimple_call_internal_fn (call
);
7095 if (!internal_store_fn_p (ifn
))
7098 if (slp_node
!= NULL
)
7100 if (dump_enabled_p ())
7101 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7102 "SLP of masked stores not supported.\n");
7106 int mask_index
= internal_fn_mask_index (ifn
);
7107 if (mask_index
>= 0)
7109 mask
= gimple_call_arg (call
, mask_index
);
7110 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
7116 op
= vect_get_store_rhs (stmt_info
);
7118 /* Cannot have hybrid store SLP -- that would mean storing to the
7119 same location twice. */
7120 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7122 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7123 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7127 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7128 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7133 /* Multiple types in SLP are handled by creating the appropriate number of
7134 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7139 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7141 gcc_assert (ncopies
>= 1);
7143 /* FORNOW. This restriction should be relaxed. */
7144 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7146 if (dump_enabled_p ())
7147 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7148 "multiple types in nested loop.\n");
7152 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7153 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7156 elem_type
= TREE_TYPE (vectype
);
7157 vec_mode
= TYPE_MODE (vectype
);
7159 if (!STMT_VINFO_DATA_REF (stmt_info
))
7162 vect_memory_access_type memory_access_type
;
7163 enum dr_alignment_support alignment_support_scheme
;
7164 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7165 ncopies
, &memory_access_type
,
7166 &alignment_support_scheme
, &gs_info
))
7171 if (memory_access_type
== VMAT_CONTIGUOUS
)
7173 if (!VECTOR_MODE_P (vec_mode
)
7174 || !can_vec_mask_load_store_p (vec_mode
,
7175 TYPE_MODE (mask_vectype
), false))
7178 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7179 && (memory_access_type
!= VMAT_GATHER_SCATTER
7180 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7182 if (dump_enabled_p ())
7183 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7184 "unsupported access type for masked store.\n");
7190 /* FORNOW. In some cases can vectorize even if data-type not supported
7191 (e.g. - array initialization with 0). */
7192 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7196 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7197 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7198 && memory_access_type
!= VMAT_GATHER_SCATTER
7199 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7202 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7203 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7204 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7208 first_stmt_info
= stmt_info
;
7209 first_dr_info
= dr_info
;
7210 group_size
= vec_num
= 1;
7213 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7215 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7216 memory_access_type
))
7220 if (!vec_stmt
) /* transformation not required. */
7222 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7225 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7226 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, vls_type
,
7227 group_size
, memory_access_type
,
7231 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7234 if (dump_enabled_p ())
7235 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7236 "incompatible vector types for invariants\n");
7240 if (dump_enabled_p ()
7241 && memory_access_type
!= VMAT_ELEMENTWISE
7242 && memory_access_type
!= VMAT_GATHER_SCATTER
7243 && alignment_support_scheme
!= dr_aligned
)
7244 dump_printf_loc (MSG_NOTE
, vect_location
,
7245 "Vectorizing an unaligned access.\n");
7247 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7248 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7249 memory_access_type
, vls_type
, slp_node
, cost_vec
);
7252 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7256 ensure_base_align (dr_info
);
7258 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7260 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7261 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7262 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7263 tree ptr
, var
, scale
, vec_mask
;
7264 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7265 tree mask_halfvectype
= mask_vectype
;
7266 edge pe
= loop_preheader_edge (loop
);
7269 enum { NARROW
, NONE
, WIDEN
} modifier
;
7270 poly_uint64 scatter_off_nunits
7271 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7273 if (known_eq (nunits
, scatter_off_nunits
))
7275 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7279 /* Currently gathers and scatters are only supported for
7280 fixed-length vectors. */
7281 unsigned int count
= scatter_off_nunits
.to_constant ();
7282 vec_perm_builder
sel (count
, count
, 1);
7283 for (i
= 0; i
< (unsigned int) count
; ++i
)
7284 sel
.quick_push (i
| (count
/ 2));
7286 vec_perm_indices
indices (sel
, 1, count
);
7287 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7289 gcc_assert (perm_mask
!= NULL_TREE
);
7291 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7295 /* Currently gathers and scatters are only supported for
7296 fixed-length vectors. */
7297 unsigned int count
= nunits
.to_constant ();
7298 vec_perm_builder
sel (count
, count
, 1);
7299 for (i
= 0; i
< (unsigned int) count
; ++i
)
7300 sel
.quick_push (i
| (count
/ 2));
7302 vec_perm_indices
indices (sel
, 2, count
);
7303 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7304 gcc_assert (perm_mask
!= NULL_TREE
);
7308 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7313 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7314 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7315 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7316 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7317 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7318 scaletype
= TREE_VALUE (arglist
);
7320 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7321 && TREE_CODE (rettype
) == VOID_TYPE
);
7323 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7324 if (!is_gimple_min_invariant (ptr
))
7326 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7327 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7328 gcc_assert (!new_bb
);
7331 if (mask
== NULL_TREE
)
7333 mask_arg
= build_int_cst (masktype
, -1);
7334 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7335 mask_arg
, masktype
, NULL
);
7338 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7340 auto_vec
<tree
> vec_oprnds0
;
7341 auto_vec
<tree
> vec_oprnds1
;
7342 auto_vec
<tree
> vec_masks
;
7345 tree mask_vectype
= truth_type_for (vectype
);
7346 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7348 ? ncopies
/ 2 : ncopies
,
7349 mask
, &vec_masks
, mask_vectype
);
7351 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7353 ? ncopies
/ 2 : ncopies
,
7354 gs_info
.offset
, &vec_oprnds0
);
7355 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7357 ? ncopies
/ 2 : ncopies
,
7359 for (j
= 0; j
< ncopies
; ++j
)
7361 if (modifier
== WIDEN
)
7364 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7365 perm_mask
, stmt_info
, gsi
);
7367 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7368 src
= vec_oprnd1
= vec_oprnds1
[j
];
7370 mask_op
= vec_mask
= vec_masks
[j
];
7372 else if (modifier
== NARROW
)
7375 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7376 perm_mask
, stmt_info
, gsi
);
7378 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7379 op
= vec_oprnd0
= vec_oprnds0
[j
];
7381 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7385 op
= vec_oprnd0
= vec_oprnds0
[j
];
7386 src
= vec_oprnd1
= vec_oprnds1
[j
];
7388 mask_op
= vec_mask
= vec_masks
[j
];
7391 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7393 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7394 TYPE_VECTOR_SUBPARTS (srctype
)));
7395 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7396 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7398 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7399 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7403 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7405 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7406 TYPE_VECTOR_SUBPARTS (idxtype
)));
7407 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7408 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7410 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7411 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7419 if (modifier
== NARROW
)
7421 var
= vect_get_new_ssa_name (mask_halfvectype
,
7424 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7425 : VEC_UNPACK_LO_EXPR
,
7427 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7430 tree optype
= TREE_TYPE (mask_arg
);
7431 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7434 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7435 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7436 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7438 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7439 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7441 if (!useless_type_conversion_p (masktype
, utype
))
7443 gcc_assert (TYPE_PRECISION (utype
)
7444 <= TYPE_PRECISION (masktype
));
7445 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7446 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7447 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7453 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7454 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7456 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7458 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7461 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7462 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7464 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7465 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7470 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7472 /* We vectorize all the stmts of the interleaving group when we
7473 reach the last stmt in the group. */
7474 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7475 < DR_GROUP_SIZE (first_stmt_info
)
7484 grouped_store
= false;
7485 /* VEC_NUM is the number of vect stmts to be created for this
7487 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7488 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7489 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7490 == first_stmt_info
);
7491 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7492 op
= vect_get_store_rhs (first_stmt_info
);
7495 /* VEC_NUM is the number of vect stmts to be created for this
7497 vec_num
= group_size
;
7499 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7502 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7504 if (dump_enabled_p ())
7505 dump_printf_loc (MSG_NOTE
, vect_location
,
7506 "transform store. ncopies = %d\n", ncopies
);
7508 if (memory_access_type
== VMAT_ELEMENTWISE
7509 || memory_access_type
== VMAT_STRIDED_SLP
)
7511 gimple_stmt_iterator incr_gsi
;
7517 tree stride_base
, stride_step
, alias_off
;
7521 /* Checked by get_load_store_type. */
7522 unsigned int const_nunits
= nunits
.to_constant ();
7524 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7525 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7527 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7529 = fold_build_pointer_plus
7530 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7531 size_binop (PLUS_EXPR
,
7532 convert_to_ptrofftype (dr_offset
),
7533 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7534 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7536 /* For a store with loop-invariant (but other than power-of-2)
7537 stride (i.e. not a grouped access) like so:
7539 for (i = 0; i < n; i += stride)
7542 we generate a new induction variable and new stores from
7543 the components of the (vectorized) rhs:
7545 for (j = 0; ; j += VF*stride)
7550 array[j + stride] = tmp2;
7554 unsigned nstores
= const_nunits
;
7556 tree ltype
= elem_type
;
7557 tree lvectype
= vectype
;
7560 if (group_size
< const_nunits
7561 && const_nunits
% group_size
== 0)
7563 nstores
= const_nunits
/ group_size
;
7565 ltype
= build_vector_type (elem_type
, group_size
);
7568 /* First check if vec_extract optab doesn't support extraction
7569 of vector elts directly. */
7570 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7572 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7573 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7574 group_size
).exists (&vmode
)
7575 || (convert_optab_handler (vec_extract_optab
,
7576 TYPE_MODE (vectype
), vmode
)
7577 == CODE_FOR_nothing
))
7579 /* Try to avoid emitting an extract of vector elements
7580 by performing the extracts using an integer type of the
7581 same size, extracting from a vector of those and then
7582 re-interpreting it as the original vector type if
7585 = group_size
* GET_MODE_BITSIZE (elmode
);
7586 unsigned int lnunits
= const_nunits
/ group_size
;
7587 /* If we can't construct such a vector fall back to
7588 element extracts from the original vector type and
7589 element size stores. */
7590 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7591 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7592 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7593 lnunits
).exists (&vmode
)
7594 && (convert_optab_handler (vec_extract_optab
,
7596 != CODE_FOR_nothing
))
7600 ltype
= build_nonstandard_integer_type (lsize
, 1);
7601 lvectype
= build_vector_type (ltype
, nstores
);
7603 /* Else fall back to vector extraction anyway.
7604 Fewer stores are more important than avoiding spilling
7605 of the vector we extract from. Compared to the
7606 construction case in vectorizable_load no store-forwarding
7607 issue exists here for reasonable archs. */
7610 else if (group_size
>= const_nunits
7611 && group_size
% const_nunits
== 0)
7614 lnel
= const_nunits
;
7618 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7619 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7622 ivstep
= stride_step
;
7623 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7624 build_int_cst (TREE_TYPE (ivstep
), vf
));
7626 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7628 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7629 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7630 create_iv (stride_base
, ivstep
, NULL
,
7631 loop
, &incr_gsi
, insert_after
,
7633 incr
= gsi_stmt (incr_gsi
);
7635 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7637 alias_off
= build_int_cst (ref_type
, 0);
7638 stmt_vec_info next_stmt_info
= first_stmt_info
;
7639 for (g
= 0; g
< group_size
; g
++)
7641 running_off
= offvar
;
7644 tree size
= TYPE_SIZE_UNIT (ltype
);
7645 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7647 tree newoff
= copy_ssa_name (running_off
, NULL
);
7648 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7650 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7651 running_off
= newoff
;
7654 op
= vect_get_store_rhs (next_stmt_info
);
7655 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
7657 unsigned int group_el
= 0;
7658 unsigned HOST_WIDE_INT
7659 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7660 for (j
= 0; j
< ncopies
; j
++)
7662 vec_oprnd
= vec_oprnds
[j
];
7663 /* Pun the vector to extract from if necessary. */
7664 if (lvectype
!= vectype
)
7666 tree tem
= make_ssa_name (lvectype
);
7668 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7669 lvectype
, vec_oprnd
));
7670 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
7673 for (i
= 0; i
< nstores
; i
++)
7675 tree newref
, newoff
;
7676 gimple
*incr
, *assign
;
7677 tree size
= TYPE_SIZE (ltype
);
7678 /* Extract the i'th component. */
7679 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7680 bitsize_int (i
), size
);
7681 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7684 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7688 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7690 newref
= build2 (MEM_REF
, ltype
,
7691 running_off
, this_off
);
7692 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7694 /* And store it to *running_off. */
7695 assign
= gimple_build_assign (newref
, elem
);
7696 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
7700 || group_el
== group_size
)
7702 newoff
= copy_ssa_name (running_off
, NULL
);
7703 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7704 running_off
, stride_step
);
7705 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7707 running_off
= newoff
;
7710 if (g
== group_size
- 1
7713 if (j
== 0 && i
== 0)
7715 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
7719 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7724 vec_oprnds
.release ();
7728 auto_vec
<tree
> dr_chain (group_size
);
7729 oprnds
.create (group_size
);
7731 /* Gather-scatter accesses perform only component accesses, alignment
7732 is irrelevant for them. */
7733 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7734 alignment_support_scheme
= dr_unaligned_supported
;
7736 alignment_support_scheme
7737 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
7739 gcc_assert (alignment_support_scheme
);
7740 vec_loop_masks
*loop_masks
7741 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7742 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7744 vec_loop_lens
*loop_lens
7745 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
7746 ? &LOOP_VINFO_LENS (loop_vinfo
)
7749 /* Shouldn't go with length-based approach if fully masked. */
7750 gcc_assert (!loop_lens
|| !loop_masks
);
7752 /* Targets with store-lane instructions must not require explicit
7753 realignment. vect_supportable_dr_alignment always returns either
7754 dr_aligned or dr_unaligned_supported for masked operations. */
7755 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7758 || alignment_support_scheme
== dr_aligned
7759 || alignment_support_scheme
== dr_unaligned_supported
);
7761 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
7762 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7763 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7766 tree vec_offset
= NULL_TREE
;
7767 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7769 aggr_type
= NULL_TREE
;
7772 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7774 aggr_type
= elem_type
;
7775 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
7776 &bump
, &vec_offset
);
7780 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7781 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7783 aggr_type
= vectype
;
7784 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
7785 memory_access_type
);
7789 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
7791 /* In case the vectorization factor (VF) is bigger than the number
7792 of elements that we can fit in a vectype (nunits), we have to generate
7793 more than one vector stmt - i.e - we need to "unroll" the
7794 vector stmt by a factor VF/nunits. */
7796 /* In case of interleaving (non-unit grouped access):
7803 We create vectorized stores starting from base address (the access of the
7804 first stmt in the chain (S2 in the above example), when the last store stmt
7805 of the chain (S4) is reached:
7808 VS2: &base + vec_size*1 = vx0
7809 VS3: &base + vec_size*2 = vx1
7810 VS4: &base + vec_size*3 = vx3
7812 Then permutation statements are generated:
7814 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7815 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7818 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7819 (the order of the data-refs in the output of vect_permute_store_chain
7820 corresponds to the order of scalar stmts in the interleaving chain - see
7821 the documentation of vect_permute_store_chain()).
7823 In case of both multiple types and interleaving, above vector stores and
7824 permutation stmts are created for every copy. The result vector stmts are
7825 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7826 STMT_VINFO_RELATED_STMT for the next copies.
7829 auto_vec
<tree
> vec_masks
;
7830 tree vec_mask
= NULL
;
7831 auto_vec
<tree
> vec_offsets
;
7832 auto_vec
<vec
<tree
> > gvec_oprnds
;
7833 gvec_oprnds
.safe_grow_cleared (group_size
, true);
7834 for (j
= 0; j
< ncopies
; j
++)
7841 /* Get vectorized arguments for SLP_NODE. */
7842 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
7844 vec_oprnd
= vec_oprnds
[0];
7848 /* For interleaved stores we collect vectorized defs for all the
7849 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7850 used as an input to vect_permute_store_chain().
7852 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7853 and OPRNDS are of size 1. */
7854 stmt_vec_info next_stmt_info
= first_stmt_info
;
7855 for (i
= 0; i
< group_size
; i
++)
7857 /* Since gaps are not supported for interleaved stores,
7858 DR_GROUP_SIZE is the exact number of stmts in the chain.
7859 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7860 that there is no interleaving, DR_GROUP_SIZE is 1,
7861 and only one iteration of the loop will be executed. */
7862 op
= vect_get_store_rhs (next_stmt_info
);
7863 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
7864 ncopies
, op
, &gvec_oprnds
[i
]);
7865 vec_oprnd
= gvec_oprnds
[i
][0];
7866 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
7867 oprnds
.quick_push (gvec_oprnds
[i
][0]);
7868 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7872 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
7873 mask
, &vec_masks
, mask_vectype
);
7874 vec_mask
= vec_masks
[0];
7878 /* We should have catched mismatched types earlier. */
7879 gcc_assert (useless_type_conversion_p (vectype
,
7880 TREE_TYPE (vec_oprnd
)));
7881 bool simd_lane_access_p
7882 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
7883 if (simd_lane_access_p
7885 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
7886 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
7887 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
7888 && integer_zerop (DR_INIT (first_dr_info
->dr
))
7889 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7890 get_alias_set (TREE_TYPE (ref_type
))))
7892 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
7893 dataref_offset
= build_int_cst (ref_type
, 0);
7895 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7897 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
7898 &dataref_ptr
, &vec_offsets
, ncopies
);
7899 vec_offset
= vec_offsets
[0];
7903 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
7904 simd_lane_access_p
? loop
: NULL
,
7905 offset
, &dummy
, gsi
, &ptr_incr
,
7906 simd_lane_access_p
, NULL_TREE
, bump
);
7910 /* For interleaved stores we created vectorized defs for all the
7911 defs stored in OPRNDS in the previous iteration (previous copy).
7912 DR_CHAIN is then used as an input to vect_permute_store_chain().
7913 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7914 OPRNDS are of size 1. */
7915 for (i
= 0; i
< group_size
; i
++)
7917 vec_oprnd
= gvec_oprnds
[i
][j
];
7918 dr_chain
[i
] = gvec_oprnds
[i
][j
];
7919 oprnds
[i
] = gvec_oprnds
[i
][j
];
7922 vec_mask
= vec_masks
[j
];
7925 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7926 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7927 vec_offset
= vec_offsets
[j
];
7929 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
7933 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7937 /* Get an array into which we can store the individual vectors. */
7938 vec_array
= create_vector_array (vectype
, vec_num
);
7940 /* Invalidate the current contents of VEC_ARRAY. This should
7941 become an RTL clobber too, which prevents the vector registers
7942 from being upward-exposed. */
7943 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
7945 /* Store the individual vectors into the array. */
7946 for (i
= 0; i
< vec_num
; i
++)
7948 vec_oprnd
= dr_chain
[i
];
7949 write_vector_array (vinfo
, stmt_info
,
7950 gsi
, vec_oprnd
, vec_array
, i
);
7953 tree final_mask
= NULL
;
7955 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
7958 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7965 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7967 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7968 tree alias_ptr
= build_int_cst (ref_type
, align
);
7969 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
7970 dataref_ptr
, alias_ptr
,
7971 final_mask
, vec_array
);
7976 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7977 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7978 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
7980 gimple_call_set_lhs (call
, data_ref
);
7982 gimple_call_set_nothrow (call
, true);
7983 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7986 /* Record that VEC_ARRAY is now dead. */
7987 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
7995 result_chain
.create (group_size
);
7997 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
7998 gsi
, &result_chain
);
8001 stmt_vec_info next_stmt_info
= first_stmt_info
;
8002 for (i
= 0; i
< vec_num
; i
++)
8005 unsigned HOST_WIDE_INT align
;
8007 tree final_mask
= NULL_TREE
;
8009 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8011 vectype
, vec_num
* j
+ i
);
8013 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8016 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8018 tree scale
= size_int (gs_info
.scale
);
8021 call
= gimple_build_call_internal
8022 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8023 scale
, vec_oprnd
, final_mask
);
8025 call
= gimple_build_call_internal
8026 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8028 gimple_call_set_nothrow (call
, true);
8029 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8035 /* Bump the vector pointer. */
8036 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8037 gsi
, stmt_info
, bump
);
8040 vec_oprnd
= vec_oprnds
[i
];
8041 else if (grouped_store
)
8042 /* For grouped stores vectorized defs are interleaved in
8043 vect_permute_store_chain(). */
8044 vec_oprnd
= result_chain
[i
];
8046 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8047 if (aligned_access_p (first_dr_info
))
8049 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8051 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8055 misalign
= DR_MISALIGNMENT (first_dr_info
);
8056 if (dataref_offset
== NULL_TREE
8057 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8058 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8061 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8063 tree perm_mask
= perm_mask_for_reverse (vectype
);
8064 tree perm_dest
= vect_create_destination_var
8065 (vect_get_store_rhs (stmt_info
), vectype
);
8066 tree new_temp
= make_ssa_name (perm_dest
);
8068 /* Generate the permute statement. */
8070 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8071 vec_oprnd
, perm_mask
);
8072 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8074 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8075 vec_oprnd
= new_temp
;
8078 /* Arguments are ready. Create the new vector stmt. */
8081 align
= least_bit_hwi (misalign
| align
);
8082 tree ptr
= build_int_cst (ref_type
, align
);
8084 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8086 final_mask
, vec_oprnd
);
8087 gimple_call_set_nothrow (call
, true);
8088 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8094 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8095 vec_num
* ncopies
, vec_num
* j
+ i
);
8096 align
= least_bit_hwi (misalign
| align
);
8097 tree ptr
= build_int_cst (ref_type
, align
);
8098 machine_mode vmode
= TYPE_MODE (vectype
);
8099 opt_machine_mode new_ovmode
8100 = get_len_load_store_mode (vmode
, false);
8101 machine_mode new_vmode
= new_ovmode
.require ();
8102 /* Need conversion if it's wrapped with VnQI. */
8103 if (vmode
!= new_vmode
)
8106 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8109 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8111 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8113 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8115 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8120 = gimple_build_call_internal (IFN_LEN_STORE
, 4, dataref_ptr
,
8121 ptr
, final_len
, vec_oprnd
);
8122 gimple_call_set_nothrow (call
, true);
8123 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8128 data_ref
= fold_build2 (MEM_REF
, vectype
,
8132 : build_int_cst (ref_type
, 0));
8133 if (aligned_access_p (first_dr_info
))
8135 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8136 TREE_TYPE (data_ref
)
8137 = build_aligned_type (TREE_TYPE (data_ref
),
8138 align
* BITS_PER_UNIT
);
8140 TREE_TYPE (data_ref
)
8141 = build_aligned_type (TREE_TYPE (data_ref
),
8142 TYPE_ALIGN (elem_type
));
8143 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8144 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8145 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8151 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8152 if (!next_stmt_info
)
8159 *vec_stmt
= new_stmt
;
8160 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8164 for (i
= 0; i
< group_size
; ++i
)
8166 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8170 result_chain
.release ();
8171 vec_oprnds
.release ();
8176 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8177 VECTOR_CST mask. No checks are made that the target platform supports the
8178 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8179 vect_gen_perm_mask_checked. */
8182 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8186 poly_uint64 nunits
= sel
.length ();
8187 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8189 mask_type
= build_vector_type (ssizetype
, nunits
);
8190 return vec_perm_indices_to_tree (mask_type
, sel
);
8193 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8194 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8197 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8199 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8200 return vect_gen_perm_mask_any (vectype
, sel
);
8203 /* Given a vector variable X and Y, that was generated for the scalar
8204 STMT_INFO, generate instructions to permute the vector elements of X and Y
8205 using permutation mask MASK_VEC, insert them at *GSI and return the
8206 permuted vector variable. */
8209 permute_vec_elements (vec_info
*vinfo
,
8210 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8211 gimple_stmt_iterator
*gsi
)
8213 tree vectype
= TREE_TYPE (x
);
8214 tree perm_dest
, data_ref
;
8217 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8218 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8219 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8221 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8222 data_ref
= make_ssa_name (perm_dest
);
8224 /* Generate the permute statement. */
8225 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8226 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8231 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8232 inserting them on the loops preheader edge. Returns true if we
8233 were successful in doing so (and thus STMT_INFO can be moved then),
8234 otherwise returns false. */
8237 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8243 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8245 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8246 if (!gimple_nop_p (def_stmt
)
8247 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8249 /* Make sure we don't need to recurse. While we could do
8250 so in simple cases when there are more complex use webs
8251 we don't have an easy way to preserve stmt order to fulfil
8252 dependencies within them. */
8255 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8257 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8259 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8260 if (!gimple_nop_p (def_stmt2
)
8261 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8271 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8273 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8274 if (!gimple_nop_p (def_stmt
)
8275 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8277 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8278 gsi_remove (&gsi
, false);
8279 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8286 /* vectorizable_load.
8288 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8289 that can be vectorized.
8290 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8291 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8292 Return true if STMT_INFO is vectorizable in this way. */
8295 vectorizable_load (vec_info
*vinfo
,
8296 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8297 gimple
**vec_stmt
, slp_tree slp_node
,
8298 stmt_vector_for_cost
*cost_vec
)
8301 tree vec_dest
= NULL
;
8302 tree data_ref
= NULL
;
8303 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8304 class loop
*loop
= NULL
;
8305 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8306 bool nested_in_vect_loop
= false;
8311 tree dataref_ptr
= NULL_TREE
;
8312 tree dataref_offset
= NULL_TREE
;
8313 gimple
*ptr_incr
= NULL
;
8316 unsigned int group_size
;
8317 poly_uint64 group_gap_adj
;
8318 tree msq
= NULL_TREE
, lsq
;
8319 tree offset
= NULL_TREE
;
8320 tree byte_offset
= NULL_TREE
;
8321 tree realignment_token
= NULL_TREE
;
8323 vec
<tree
> dr_chain
= vNULL
;
8324 bool grouped_load
= false;
8325 stmt_vec_info first_stmt_info
;
8326 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8327 bool compute_in_loop
= false;
8328 class loop
*at_loop
;
8330 bool slp
= (slp_node
!= NULL
);
8331 bool slp_perm
= false;
8332 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8335 gather_scatter_info gs_info
;
8337 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8339 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8342 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8346 if (!STMT_VINFO_DATA_REF (stmt_info
))
8349 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8350 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8351 which can be different when reduction chains were re-ordered.
8352 Now that we figured we're a dataref reset stmt_info back to
8353 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8354 refactored in a way to maintain the dr_vec_info pointer for the
8355 relevant access explicitely. */
8356 stmt_vec_info orig_stmt_info
= stmt_info
;
8358 stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8360 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8361 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8363 scalar_dest
= gimple_assign_lhs (assign
);
8364 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8367 tree_code code
= gimple_assign_rhs_code (assign
);
8368 if (code
!= ARRAY_REF
8369 && code
!= BIT_FIELD_REF
8370 && code
!= INDIRECT_REF
8371 && code
!= COMPONENT_REF
8372 && code
!= IMAGPART_EXPR
8373 && code
!= REALPART_EXPR
8375 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8380 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8381 if (!call
|| !gimple_call_internal_p (call
))
8384 internal_fn ifn
= gimple_call_internal_fn (call
);
8385 if (!internal_load_fn_p (ifn
))
8388 scalar_dest
= gimple_call_lhs (call
);
8392 int mask_index
= internal_fn_mask_index (ifn
);
8393 if (mask_index
>= 0)
8395 mask
= gimple_call_arg (call
, mask_index
);
8396 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
8402 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8403 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8407 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8408 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8409 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8414 /* Multiple types in SLP are handled by creating the appropriate number of
8415 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8420 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8422 gcc_assert (ncopies
>= 1);
8424 /* FORNOW. This restriction should be relaxed. */
8425 if (nested_in_vect_loop
&& ncopies
> 1)
8427 if (dump_enabled_p ())
8428 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8429 "multiple types in nested loop.\n");
8433 /* Invalidate assumptions made by dependence analysis when vectorization
8434 on the unrolled body effectively re-orders stmts. */
8436 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8437 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8438 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8440 if (dump_enabled_p ())
8441 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8442 "cannot perform implicit CSE when unrolling "
8443 "with negative dependence distance\n");
8447 elem_type
= TREE_TYPE (vectype
);
8448 mode
= TYPE_MODE (vectype
);
8450 /* FORNOW. In some cases can vectorize even if data-type not supported
8451 (e.g. - data copies). */
8452 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8454 if (dump_enabled_p ())
8455 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8456 "Aligned load, but unsupported type.\n");
8460 /* Check if the load is a part of an interleaving chain. */
8461 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8463 grouped_load
= true;
8465 gcc_assert (!nested_in_vect_loop
);
8466 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8468 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8469 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8471 /* Refuse non-SLP vectorization of SLP-only groups. */
8472 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8474 if (dump_enabled_p ())
8475 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8476 "cannot vectorize load in non-SLP mode.\n");
8480 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8486 /* In BB vectorization we may not actually use a loaded vector
8487 accessing elements in excess of DR_GROUP_SIZE. */
8488 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8489 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8490 unsigned HOST_WIDE_INT nunits
;
8491 unsigned j
, k
, maxk
= 0;
8492 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8495 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
8496 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8497 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8499 if (dump_enabled_p ())
8500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8501 "BB vectorization with gaps at the end of "
8502 "a load is not supported\n");
8509 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8512 if (dump_enabled_p ())
8513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8515 "unsupported load permutation\n");
8520 /* Invalidate assumptions made by dependence analysis when vectorization
8521 on the unrolled body effectively re-orders stmts. */
8522 if (!PURE_SLP_STMT (stmt_info
)
8523 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8524 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8525 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8527 if (dump_enabled_p ())
8528 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8529 "cannot perform implicit CSE when performing "
8530 "group loads with negative dependence distance\n");
8537 vect_memory_access_type memory_access_type
;
8538 enum dr_alignment_support alignment_support_scheme
;
8539 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
8540 ncopies
, &memory_access_type
,
8541 &alignment_support_scheme
, &gs_info
))
8546 if (memory_access_type
== VMAT_CONTIGUOUS
)
8548 machine_mode vec_mode
= TYPE_MODE (vectype
);
8549 if (!VECTOR_MODE_P (vec_mode
)
8550 || !can_vec_mask_load_store_p (vec_mode
,
8551 TYPE_MODE (mask_vectype
), true))
8554 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8555 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8557 if (dump_enabled_p ())
8558 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8559 "unsupported access type for masked load.\n");
8564 if (!vec_stmt
) /* transformation not required. */
8567 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8570 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8571 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, VLS_LOAD
,
8572 group_size
, memory_access_type
,
8575 if (dump_enabled_p ()
8576 && memory_access_type
!= VMAT_ELEMENTWISE
8577 && memory_access_type
!= VMAT_GATHER_SCATTER
8578 && alignment_support_scheme
!= dr_aligned
)
8579 dump_printf_loc (MSG_NOTE
, vect_location
,
8580 "Vectorizing an unaligned access.\n");
8582 STMT_VINFO_TYPE (orig_stmt_info
) = load_vec_info_type
;
8583 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8584 slp_node
, cost_vec
);
8589 gcc_assert (memory_access_type
8590 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8592 if (dump_enabled_p ())
8593 dump_printf_loc (MSG_NOTE
, vect_location
,
8594 "transform load. ncopies = %d\n", ncopies
);
8598 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8599 ensure_base_align (dr_info
);
8601 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8603 vect_build_gather_load_calls (vinfo
,
8604 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8608 if (memory_access_type
== VMAT_INVARIANT
)
8610 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8611 /* If we have versioned for aliasing or the loop doesn't
8612 have any data dependencies that would preclude this,
8613 then we are sure this is a loop invariant load and
8614 thus we can insert it on the preheader edge. */
8615 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8616 && !nested_in_vect_loop
8617 && hoist_defs_of_uses (stmt_info
, loop
));
8620 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8621 if (dump_enabled_p ())
8622 dump_printf_loc (MSG_NOTE
, vect_location
,
8623 "hoisting out of the vectorized loop: %G", stmt
);
8624 scalar_dest
= copy_ssa_name (scalar_dest
);
8625 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8626 gsi_insert_on_edge_immediate
8627 (loop_preheader_edge (loop
),
8628 gimple_build_assign (scalar_dest
, rhs
));
8630 /* These copies are all equivalent, but currently the representation
8631 requires a separate STMT_VINFO_VEC_STMT for each one. */
8632 gimple_stmt_iterator gsi2
= *gsi
;
8634 for (j
= 0; j
< ncopies
; j
++)
8637 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8640 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8642 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8644 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8648 *vec_stmt
= new_stmt
;
8649 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8655 if (memory_access_type
== VMAT_ELEMENTWISE
8656 || memory_access_type
== VMAT_STRIDED_SLP
)
8658 gimple_stmt_iterator incr_gsi
;
8663 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8664 tree stride_base
, stride_step
, alias_off
;
8665 /* Checked by get_load_store_type. */
8666 unsigned int const_nunits
= nunits
.to_constant ();
8667 unsigned HOST_WIDE_INT cst_offset
= 0;
8670 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
8671 gcc_assert (!nested_in_vect_loop
);
8675 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8676 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8680 first_stmt_info
= stmt_info
;
8681 first_dr_info
= dr_info
;
8683 if (slp
&& grouped_load
)
8685 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8686 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8692 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8693 * vect_get_place_in_interleaving_chain (stmt_info
,
8696 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8699 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8701 = fold_build_pointer_plus
8702 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8703 size_binop (PLUS_EXPR
,
8704 convert_to_ptrofftype (dr_offset
),
8705 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8706 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8708 /* For a load with loop-invariant (but other than power-of-2)
8709 stride (i.e. not a grouped access) like so:
8711 for (i = 0; i < n; i += stride)
8714 we generate a new induction variable and new accesses to
8715 form a new vector (or vectors, depending on ncopies):
8717 for (j = 0; ; j += VF*stride)
8719 tmp2 = array[j + stride];
8721 vectemp = {tmp1, tmp2, ...}
8724 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8725 build_int_cst (TREE_TYPE (stride_step
), vf
));
8727 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8729 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8730 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8731 create_iv (stride_base
, ivstep
, NULL
,
8732 loop
, &incr_gsi
, insert_after
,
8735 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8737 running_off
= offvar
;
8738 alias_off
= build_int_cst (ref_type
, 0);
8739 int nloads
= const_nunits
;
8741 tree ltype
= TREE_TYPE (vectype
);
8742 tree lvectype
= vectype
;
8743 auto_vec
<tree
> dr_chain
;
8744 if (memory_access_type
== VMAT_STRIDED_SLP
)
8746 if (group_size
< const_nunits
)
8748 /* First check if vec_init optab supports construction from vector
8749 elts directly. Otherwise avoid emitting a constructor of
8750 vector elements by performing the loads using an integer type
8751 of the same size, constructing a vector of those and then
8752 re-interpreting it as the original vector type. This avoids a
8753 huge runtime penalty due to the general inability to perform
8754 store forwarding from smaller stores to a larger load. */
8757 = vector_vector_composition_type (vectype
,
8758 const_nunits
/ group_size
,
8760 if (vtype
!= NULL_TREE
)
8762 nloads
= const_nunits
/ group_size
;
8771 lnel
= const_nunits
;
8774 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8776 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8777 else if (nloads
== 1)
8782 /* For SLP permutation support we need to load the whole group,
8783 not only the number of vector stmts the permutation result
8787 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8789 unsigned int const_vf
= vf
.to_constant ();
8790 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
8791 dr_chain
.create (ncopies
);
8794 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8796 unsigned int group_el
= 0;
8797 unsigned HOST_WIDE_INT
8798 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8799 for (j
= 0; j
< ncopies
; j
++)
8802 vec_alloc (v
, nloads
);
8803 gimple
*new_stmt
= NULL
;
8804 for (i
= 0; i
< nloads
; i
++)
8806 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8807 group_el
* elsz
+ cst_offset
);
8808 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
8809 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8810 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
8811 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8813 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
8814 gimple_assign_lhs (new_stmt
));
8818 || group_el
== group_size
)
8820 tree newoff
= copy_ssa_name (running_off
);
8821 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8822 running_off
, stride_step
);
8823 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8825 running_off
= newoff
;
8831 tree vec_inv
= build_constructor (lvectype
, v
);
8832 new_temp
= vect_init_vector (vinfo
, stmt_info
,
8833 vec_inv
, lvectype
, gsi
);
8834 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8835 if (lvectype
!= vectype
)
8837 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
8839 build1 (VIEW_CONVERT_EXPR
,
8840 vectype
, new_temp
));
8841 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8848 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
8850 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8855 *vec_stmt
= new_stmt
;
8856 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8862 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
8868 if (memory_access_type
== VMAT_GATHER_SCATTER
8869 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
8870 grouped_load
= false;
8874 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8875 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8876 /* For SLP vectorization we directly vectorize a subchain
8877 without permutation. */
8878 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8879 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8880 /* For BB vectorization always use the first stmt to base
8881 the data ref pointer on. */
8883 first_stmt_info_for_drptr
8884 = vect_find_first_scalar_stmt_in_slp (slp_node
);
8886 /* Check if the chain of loads is already vectorized. */
8887 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
8888 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8889 ??? But we can only do so if there is exactly one
8890 as we have no way to get at the rest. Leave the CSE
8892 ??? With the group load eventually participating
8893 in multiple different permutations (having multiple
8894 slp nodes which refer to the same group) the CSE
8895 is even wrong code. See PR56270. */
8898 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
8901 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8904 /* VEC_NUM is the number of vect stmts to be created for this group. */
8907 grouped_load
= false;
8908 /* If an SLP permutation is from N elements to N elements,
8909 and if one vector holds a whole number of N, we can load
8910 the inputs to the permutation in the same way as an
8911 unpermuted sequence. In other cases we need to load the
8912 whole group, not only the number of vector stmts the
8913 permutation result fits in. */
8914 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
8916 && (group_size
!= scalar_lanes
8917 || !multiple_p (nunits
, group_size
)))
8919 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8920 variable VF; see vect_transform_slp_perm_load. */
8921 unsigned int const_vf
= vf
.to_constant ();
8922 unsigned int const_nunits
= nunits
.to_constant ();
8923 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
8924 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
8928 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8930 = group_size
- scalar_lanes
;
8934 vec_num
= group_size
;
8936 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8940 first_stmt_info
= stmt_info
;
8941 first_dr_info
= dr_info
;
8942 group_size
= vec_num
= 1;
8944 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8947 gcc_assert (alignment_support_scheme
);
8948 vec_loop_masks
*loop_masks
8949 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8950 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8952 vec_loop_lens
*loop_lens
8953 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8954 ? &LOOP_VINFO_LENS (loop_vinfo
)
8957 /* Shouldn't go with length-based approach if fully masked. */
8958 gcc_assert (!loop_lens
|| !loop_masks
);
8960 /* Targets with store-lane instructions must not require explicit
8961 realignment. vect_supportable_dr_alignment always returns either
8962 dr_aligned or dr_unaligned_supported for masked operations. */
8963 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8966 || alignment_support_scheme
== dr_aligned
8967 || alignment_support_scheme
== dr_unaligned_supported
);
8969 /* In case the vectorization factor (VF) is bigger than the number
8970 of elements that we can fit in a vectype (nunits), we have to generate
8971 more than one vector stmt - i.e - we need to "unroll" the
8972 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8973 from one copy of the vector stmt to the next, in the field
8974 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8975 stages to find the correct vector defs to be used when vectorizing
8976 stmts that use the defs of the current stmt. The example below
8977 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8978 need to create 4 vectorized stmts):
8980 before vectorization:
8981 RELATED_STMT VEC_STMT
8985 step 1: vectorize stmt S1:
8986 We first create the vector stmt VS1_0, and, as usual, record a
8987 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8988 Next, we create the vector stmt VS1_1, and record a pointer to
8989 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8990 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8992 RELATED_STMT VEC_STMT
8993 VS1_0: vx0 = memref0 VS1_1 -
8994 VS1_1: vx1 = memref1 VS1_2 -
8995 VS1_2: vx2 = memref2 VS1_3 -
8996 VS1_3: vx3 = memref3 - -
8997 S1: x = load - VS1_0
9001 /* In case of interleaving (non-unit grouped access):
9008 Vectorized loads are created in the order of memory accesses
9009 starting from the access of the first stmt of the chain:
9012 VS2: vx1 = &base + vec_size*1
9013 VS3: vx3 = &base + vec_size*2
9014 VS4: vx4 = &base + vec_size*3
9016 Then permutation statements are generated:
9018 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9019 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9022 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9023 (the order of the data-refs in the output of vect_permute_load_chain
9024 corresponds to the order of scalar stmts in the interleaving chain - see
9025 the documentation of vect_permute_load_chain()).
9026 The generation of permutation stmts and recording them in
9027 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9029 In case of both multiple types and interleaving, the vector loads and
9030 permutation stmts above are created for every copy. The result vector
9031 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9032 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9034 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9035 on a target that supports unaligned accesses (dr_unaligned_supported)
9036 we generate the following code:
9040 p = p + indx * vectype_size;
9045 Otherwise, the data reference is potentially unaligned on a target that
9046 does not support unaligned accesses (dr_explicit_realign_optimized) -
9047 then generate the following code, in which the data in each iteration is
9048 obtained by two vector loads, one from the previous iteration, and one
9049 from the current iteration:
9051 msq_init = *(floor(p1))
9052 p2 = initial_addr + VS - 1;
9053 realignment_token = call target_builtin;
9056 p2 = p2 + indx * vectype_size
9058 vec_dest = realign_load (msq, lsq, realignment_token)
9063 /* If the misalignment remains the same throughout the execution of the
9064 loop, we can create the init_addr and permutation mask at the loop
9065 preheader. Otherwise, it needs to be created inside the loop.
9066 This can only occur when vectorizing memory accesses in the inner-loop
9067 nested within an outer-loop that is being vectorized. */
9069 if (nested_in_vect_loop
9070 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9071 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9073 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9074 compute_in_loop
= true;
9077 bool diff_first_stmt_info
9078 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9080 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9081 || alignment_support_scheme
== dr_explicit_realign
)
9082 && !compute_in_loop
)
9084 /* If we have different first_stmt_info, we can't set up realignment
9085 here, since we can't guarantee first_stmt_info DR has been
9086 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9087 distance from first_stmt_info DR instead as below. */
9088 if (!diff_first_stmt_info
)
9089 msq
= vect_setup_realignment (vinfo
,
9090 first_stmt_info
, gsi
, &realignment_token
,
9091 alignment_support_scheme
, NULL_TREE
,
9093 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9095 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9096 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9098 gcc_assert (!first_stmt_info_for_drptr
);
9104 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9105 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9108 tree vec_offset
= NULL_TREE
;
9109 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9111 aggr_type
= NULL_TREE
;
9114 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9116 aggr_type
= elem_type
;
9117 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9118 &bump
, &vec_offset
);
9122 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9123 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9125 aggr_type
= vectype
;
9126 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9127 memory_access_type
);
9130 vec
<tree
> vec_offsets
= vNULL
;
9131 auto_vec
<tree
> vec_masks
;
9133 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
9134 mask
, &vec_masks
, mask_vectype
, NULL_TREE
);
9135 tree vec_mask
= NULL_TREE
;
9136 poly_uint64 group_elt
= 0;
9137 for (j
= 0; j
< ncopies
; j
++)
9139 /* 1. Create the vector or array pointer update chain. */
9142 bool simd_lane_access_p
9143 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9144 if (simd_lane_access_p
9145 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9146 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9147 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9148 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9149 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9150 get_alias_set (TREE_TYPE (ref_type
)))
9151 && (alignment_support_scheme
== dr_aligned
9152 || alignment_support_scheme
== dr_unaligned_supported
))
9154 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9155 dataref_offset
= build_int_cst (ref_type
, 0);
9157 else if (diff_first_stmt_info
)
9160 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9161 aggr_type
, at_loop
, offset
, &dummy
,
9162 gsi
, &ptr_incr
, simd_lane_access_p
,
9164 /* Adjust the pointer by the difference to first_stmt. */
9165 data_reference_p ptrdr
9166 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9168 = fold_convert (sizetype
,
9169 size_binop (MINUS_EXPR
,
9170 DR_INIT (first_dr_info
->dr
),
9172 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9174 if (alignment_support_scheme
== dr_explicit_realign
)
9176 msq
= vect_setup_realignment (vinfo
,
9177 first_stmt_info_for_drptr
, gsi
,
9179 alignment_support_scheme
,
9180 dataref_ptr
, &at_loop
);
9181 gcc_assert (!compute_in_loop
);
9184 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9186 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
9187 &dataref_ptr
, &vec_offsets
, ncopies
);
9188 vec_offset
= vec_offsets
[0];
9192 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9194 offset
, &dummy
, gsi
, &ptr_incr
,
9198 vec_mask
= vec_masks
[0];
9203 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9205 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9206 vec_offset
= vec_offsets
[j
];
9208 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9211 vec_mask
= vec_masks
[j
];
9214 if (grouped_load
|| slp_perm
)
9215 dr_chain
.create (vec_num
);
9217 gimple
*new_stmt
= NULL
;
9218 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9222 vec_array
= create_vector_array (vectype
, vec_num
);
9224 tree final_mask
= NULL_TREE
;
9226 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9229 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9236 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9238 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9239 tree alias_ptr
= build_int_cst (ref_type
, align
);
9240 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9241 dataref_ptr
, alias_ptr
,
9247 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9248 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9249 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9251 gimple_call_set_lhs (call
, vec_array
);
9252 gimple_call_set_nothrow (call
, true);
9253 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9256 /* Extract each vector into an SSA_NAME. */
9257 for (i
= 0; i
< vec_num
; i
++)
9259 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9261 dr_chain
.quick_push (new_temp
);
9264 /* Record the mapping between SSA_NAMEs and statements. */
9265 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9267 /* Record that VEC_ARRAY is now dead. */
9268 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9272 for (i
= 0; i
< vec_num
; i
++)
9274 tree final_mask
= NULL_TREE
;
9276 && memory_access_type
!= VMAT_INVARIANT
)
9277 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9279 vectype
, vec_num
* j
+ i
);
9281 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9285 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9286 gsi
, stmt_info
, bump
);
9288 /* 2. Create the vector-load in the loop. */
9289 switch (alignment_support_scheme
)
9292 case dr_unaligned_supported
:
9294 unsigned int misalign
;
9295 unsigned HOST_WIDE_INT align
;
9297 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9299 tree zero
= build_zero_cst (vectype
);
9300 tree scale
= size_int (gs_info
.scale
);
9303 call
= gimple_build_call_internal
9304 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9305 vec_offset
, scale
, zero
, final_mask
);
9307 call
= gimple_build_call_internal
9308 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9309 vec_offset
, scale
, zero
);
9310 gimple_call_set_nothrow (call
, true);
9312 data_ref
= NULL_TREE
;
9317 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9318 if (alignment_support_scheme
== dr_aligned
)
9320 gcc_assert (aligned_access_p (first_dr_info
));
9323 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9325 align
= dr_alignment
9326 (vect_dr_behavior (vinfo
, first_dr_info
));
9330 misalign
= DR_MISALIGNMENT (first_dr_info
);
9331 if (dataref_offset
== NULL_TREE
9332 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9333 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9338 align
= least_bit_hwi (misalign
| align
);
9339 tree ptr
= build_int_cst (ref_type
, align
);
9341 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9344 gimple_call_set_nothrow (call
, true);
9346 data_ref
= NULL_TREE
;
9348 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
9351 = vect_get_loop_len (loop_vinfo
, loop_lens
,
9354 align
= least_bit_hwi (misalign
| align
);
9355 tree ptr
= build_int_cst (ref_type
, align
);
9357 = gimple_build_call_internal (IFN_LEN_LOAD
, 3,
9360 gimple_call_set_nothrow (call
, true);
9362 data_ref
= NULL_TREE
;
9364 /* Need conversion if it's wrapped with VnQI. */
9365 machine_mode vmode
= TYPE_MODE (vectype
);
9366 opt_machine_mode new_ovmode
9367 = get_len_load_store_mode (vmode
, true);
9368 machine_mode new_vmode
= new_ovmode
.require ();
9369 if (vmode
!= new_vmode
)
9371 tree qi_type
= unsigned_intQI_type_node
;
9373 = build_vector_type_for_mode (qi_type
, new_vmode
);
9374 tree var
= vect_get_new_ssa_name (new_vtype
,
9376 gimple_set_lhs (call
, var
);
9377 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
9379 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
9381 = gimple_build_assign (vec_dest
,
9382 VIEW_CONVERT_EXPR
, op
);
9387 tree ltype
= vectype
;
9388 tree new_vtype
= NULL_TREE
;
9389 unsigned HOST_WIDE_INT gap
9390 = DR_GROUP_GAP (first_stmt_info
);
9391 unsigned int vect_align
9392 = vect_known_alignment_in_bytes (first_dr_info
);
9393 unsigned int scalar_dr_size
9394 = vect_get_scalar_dr_size (first_dr_info
);
9395 /* If there's no peeling for gaps but we have a gap
9396 with slp loads then load the lower half of the
9397 vector only. See get_group_load_store_type for
9398 when we apply this optimization. */
9401 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9403 && known_eq (nunits
, (group_size
- gap
) * 2)
9404 && known_eq (nunits
, group_size
)
9405 && gap
>= (vect_align
/ scalar_dr_size
))
9409 = vector_vector_composition_type (vectype
, 2,
9411 if (new_vtype
!= NULL_TREE
)
9415 = (dataref_offset
? dataref_offset
9416 : build_int_cst (ref_type
, 0));
9417 if (ltype
!= vectype
9418 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9420 unsigned HOST_WIDE_INT gap_offset
9421 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9422 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9423 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9426 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9427 if (alignment_support_scheme
== dr_aligned
)
9429 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9430 TREE_TYPE (data_ref
)
9431 = build_aligned_type (TREE_TYPE (data_ref
),
9432 align
* BITS_PER_UNIT
);
9434 TREE_TYPE (data_ref
)
9435 = build_aligned_type (TREE_TYPE (data_ref
),
9436 TYPE_ALIGN (elem_type
));
9437 if (ltype
!= vectype
)
9439 vect_copy_ref_info (data_ref
,
9440 DR_REF (first_dr_info
->dr
));
9441 tree tem
= make_ssa_name (ltype
);
9442 new_stmt
= gimple_build_assign (tem
, data_ref
);
9443 vect_finish_stmt_generation (vinfo
, stmt_info
,
9446 vec
<constructor_elt
, va_gc
> *v
;
9448 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9450 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9451 build_zero_cst (ltype
));
9452 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9456 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9457 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9458 build_zero_cst (ltype
));
9460 gcc_assert (new_vtype
!= NULL_TREE
);
9461 if (new_vtype
== vectype
)
9462 new_stmt
= gimple_build_assign (
9463 vec_dest
, build_constructor (vectype
, v
));
9466 tree new_vname
= make_ssa_name (new_vtype
);
9467 new_stmt
= gimple_build_assign (
9468 new_vname
, build_constructor (new_vtype
, v
));
9469 vect_finish_stmt_generation (vinfo
, stmt_info
,
9471 new_stmt
= gimple_build_assign (
9472 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9479 case dr_explicit_realign
:
9483 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9485 if (compute_in_loop
)
9486 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9488 dr_explicit_realign
,
9491 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9492 ptr
= copy_ssa_name (dataref_ptr
);
9494 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9495 // For explicit realign the target alignment should be
9496 // known at compile time.
9497 unsigned HOST_WIDE_INT align
=
9498 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9499 new_stmt
= gimple_build_assign
9500 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9502 (TREE_TYPE (dataref_ptr
),
9503 -(HOST_WIDE_INT
) align
));
9504 vect_finish_stmt_generation (vinfo
, stmt_info
,
9507 = build2 (MEM_REF
, vectype
, ptr
,
9508 build_int_cst (ref_type
, 0));
9509 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9510 vec_dest
= vect_create_destination_var (scalar_dest
,
9512 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9513 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9514 gimple_assign_set_lhs (new_stmt
, new_temp
);
9515 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9516 vect_finish_stmt_generation (vinfo
, stmt_info
,
9520 bump
= size_binop (MULT_EXPR
, vs
,
9521 TYPE_SIZE_UNIT (elem_type
));
9522 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9523 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9525 new_stmt
= gimple_build_assign
9526 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9528 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9529 ptr
= copy_ssa_name (ptr
, new_stmt
);
9530 gimple_assign_set_lhs (new_stmt
, ptr
);
9531 vect_finish_stmt_generation (vinfo
, stmt_info
,
9534 = build2 (MEM_REF
, vectype
, ptr
,
9535 build_int_cst (ref_type
, 0));
9538 case dr_explicit_realign_optimized
:
9540 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9541 new_temp
= copy_ssa_name (dataref_ptr
);
9543 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9544 // We should only be doing this if we know the target
9545 // alignment at compile time.
9546 unsigned HOST_WIDE_INT align
=
9547 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9548 new_stmt
= gimple_build_assign
9549 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9550 build_int_cst (TREE_TYPE (dataref_ptr
),
9551 -(HOST_WIDE_INT
) align
));
9552 vect_finish_stmt_generation (vinfo
, stmt_info
,
9555 = build2 (MEM_REF
, vectype
, new_temp
,
9556 build_int_cst (ref_type
, 0));
9562 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9563 /* DATA_REF is null if we've already built the statement. */
9566 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9567 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9569 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9570 gimple_set_lhs (new_stmt
, new_temp
);
9571 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9573 /* 3. Handle explicit realignment if necessary/supported.
9575 vec_dest = realign_load (msq, lsq, realignment_token) */
9576 if (alignment_support_scheme
== dr_explicit_realign_optimized
9577 || alignment_support_scheme
== dr_explicit_realign
)
9579 lsq
= gimple_assign_lhs (new_stmt
);
9580 if (!realignment_token
)
9581 realignment_token
= dataref_ptr
;
9582 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9583 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9584 msq
, lsq
, realignment_token
);
9585 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9586 gimple_assign_set_lhs (new_stmt
, new_temp
);
9587 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9589 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9592 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9593 add_phi_arg (phi
, lsq
,
9594 loop_latch_edge (containing_loop
),
9600 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9602 tree perm_mask
= perm_mask_for_reverse (vectype
);
9603 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9604 perm_mask
, stmt_info
, gsi
);
9605 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9608 /* Collect vector loads and later create their permutation in
9609 vect_transform_grouped_load (). */
9610 if (grouped_load
|| slp_perm
)
9611 dr_chain
.quick_push (new_temp
);
9613 /* Store vector loads in the corresponding SLP_NODE. */
9614 if (slp
&& !slp_perm
)
9615 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9617 /* With SLP permutation we load the gaps as well, without
9618 we need to skip the gaps after we manage to fully load
9619 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9620 group_elt
+= nunits
;
9621 if (maybe_ne (group_gap_adj
, 0U)
9623 && known_eq (group_elt
, group_size
- group_gap_adj
))
9625 poly_wide_int bump_val
9626 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9628 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9629 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9630 gsi
, stmt_info
, bump
);
9634 /* Bump the vector pointer to account for a gap or for excess
9635 elements loaded for a permuted SLP load. */
9636 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9638 poly_wide_int bump_val
9639 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9641 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9642 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9647 if (slp
&& !slp_perm
)
9653 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
9654 gsi
, vf
, false, &n_perms
);
9661 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9662 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9664 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9668 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9671 dr_chain
.release ();
9674 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9679 /* Function vect_is_simple_cond.
9682 LOOP - the loop that is being vectorized.
9683 COND - Condition that is checked for simple use.
9686 *COMP_VECTYPE - the vector type for the comparison.
9687 *DTS - The def types for the arguments of the comparison
9689 Returns whether a COND can be vectorized. Checks whether
9690 condition operands are supportable using vec_is_simple_use. */
9693 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
9694 slp_tree slp_node
, tree
*comp_vectype
,
9695 enum vect_def_type
*dts
, tree vectype
)
9698 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9702 if (TREE_CODE (cond
) == SSA_NAME
9703 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9705 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
9706 &slp_op
, &dts
[0], comp_vectype
)
9708 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9713 if (!COMPARISON_CLASS_P (cond
))
9716 lhs
= TREE_OPERAND (cond
, 0);
9717 rhs
= TREE_OPERAND (cond
, 1);
9719 if (TREE_CODE (lhs
) == SSA_NAME
)
9721 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
9722 &lhs
, &slp_op
, &dts
[0], &vectype1
))
9725 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9726 || TREE_CODE (lhs
) == FIXED_CST
)
9727 dts
[0] = vect_constant_def
;
9731 if (TREE_CODE (rhs
) == SSA_NAME
)
9733 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
9734 &rhs
, &slp_op
, &dts
[1], &vectype2
))
9737 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
9738 || TREE_CODE (rhs
) == FIXED_CST
)
9739 dts
[1] = vect_constant_def
;
9743 if (vectype1
&& vectype2
9744 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9745 TYPE_VECTOR_SUBPARTS (vectype2
)))
9748 *comp_vectype
= vectype1
? vectype1
: vectype2
;
9749 /* Invariant comparison. */
9750 if (! *comp_vectype
)
9752 tree scalar_type
= TREE_TYPE (lhs
);
9753 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9754 *comp_vectype
= truth_type_for (vectype
);
9757 /* If we can widen the comparison to match vectype do so. */
9758 if (INTEGRAL_TYPE_P (scalar_type
)
9760 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
9761 TYPE_SIZE (TREE_TYPE (vectype
))))
9762 scalar_type
= build_nonstandard_integer_type
9763 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
9764 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
9772 /* vectorizable_condition.
9774 Check if STMT_INFO is conditional modify expression that can be vectorized.
9775 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9776 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9779 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9781 Return true if STMT_INFO is vectorizable in this way. */
9784 vectorizable_condition (vec_info
*vinfo
,
9785 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9787 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9789 tree scalar_dest
= NULL_TREE
;
9790 tree vec_dest
= NULL_TREE
;
9791 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
9792 tree then_clause
, else_clause
;
9793 tree comp_vectype
= NULL_TREE
;
9794 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
9795 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
9798 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
9799 enum vect_def_type dts
[4]
9800 = {vect_unknown_def_type
, vect_unknown_def_type
,
9801 vect_unknown_def_type
, vect_unknown_def_type
};
9805 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9807 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
9808 vec
<tree
> vec_oprnds0
= vNULL
;
9809 vec
<tree
> vec_oprnds1
= vNULL
;
9810 vec
<tree
> vec_oprnds2
= vNULL
;
9811 vec
<tree
> vec_oprnds3
= vNULL
;
9813 bool masked
= false;
9815 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9818 /* Is vectorizable conditional operation? */
9819 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9823 code
= gimple_assign_rhs_code (stmt
);
9824 if (code
!= COND_EXPR
)
9827 stmt_vec_info reduc_info
= NULL
;
9828 int reduc_index
= -1;
9829 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
9831 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
9834 if (STMT_SLP_TYPE (stmt_info
))
9836 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
9837 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
9838 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
9839 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
9840 || reduc_index
!= -1);
9844 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
9848 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9849 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9854 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9858 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9862 gcc_assert (ncopies
>= 1);
9863 if (for_reduction
&& ncopies
> 1)
9864 return false; /* FORNOW */
9866 cond_expr
= gimple_assign_rhs1 (stmt
);
9868 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
9869 &comp_vectype
, &dts
[0], vectype
)
9873 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
9874 slp_tree then_slp_node
, else_slp_node
;
9875 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
9876 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
9878 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
9879 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
9882 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
9885 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
9888 masked
= !COMPARISON_CLASS_P (cond_expr
);
9889 vec_cmp_type
= truth_type_for (comp_vectype
);
9891 if (vec_cmp_type
== NULL_TREE
)
9894 cond_code
= TREE_CODE (cond_expr
);
9897 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
9898 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
9901 /* For conditional reductions, the "then" value needs to be the candidate
9902 value calculated by this iteration while the "else" value needs to be
9903 the result carried over from previous iterations. If the COND_EXPR
9904 is the other way around, we need to swap it. */
9905 bool must_invert_cmp_result
= false;
9906 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
9909 must_invert_cmp_result
= true;
9912 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
9913 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
9914 if (new_code
== ERROR_MARK
)
9915 must_invert_cmp_result
= true;
9918 cond_code
= new_code
;
9919 /* Make sure we don't accidentally use the old condition. */
9920 cond_expr
= NULL_TREE
;
9923 std::swap (then_clause
, else_clause
);
9926 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
9928 /* Boolean values may have another representation in vectors
9929 and therefore we prefer bit operations over comparison for
9930 them (which also works for scalar masks). We store opcodes
9931 to use in bitop1 and bitop2. Statement is vectorized as
9932 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9933 depending on bitop1 and bitop2 arity. */
9937 bitop1
= BIT_NOT_EXPR
;
9938 bitop2
= BIT_AND_EXPR
;
9941 bitop1
= BIT_NOT_EXPR
;
9942 bitop2
= BIT_IOR_EXPR
;
9945 bitop1
= BIT_NOT_EXPR
;
9946 bitop2
= BIT_AND_EXPR
;
9947 std::swap (cond_expr0
, cond_expr1
);
9950 bitop1
= BIT_NOT_EXPR
;
9951 bitop2
= BIT_IOR_EXPR
;
9952 std::swap (cond_expr0
, cond_expr1
);
9955 bitop1
= BIT_XOR_EXPR
;
9958 bitop1
= BIT_XOR_EXPR
;
9959 bitop2
= BIT_NOT_EXPR
;
9964 cond_code
= SSA_NAME
;
9967 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
9968 && reduction_type
== EXTRACT_LAST_REDUCTION
9969 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
9971 if (dump_enabled_p ())
9972 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9973 "reduction comparison operation not supported.\n");
9979 if (bitop1
!= NOP_EXPR
)
9981 machine_mode mode
= TYPE_MODE (comp_vectype
);
9984 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
9985 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9988 if (bitop2
!= NOP_EXPR
)
9990 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
9992 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9997 vect_cost_for_stmt kind
= vector_stmt
;
9998 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
9999 /* Count one reduction-like operation per vector. */
10000 kind
= vec_to_scalar
;
10001 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10005 && (!vect_maybe_update_slp_op_vectype
10006 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10008 && !vect_maybe_update_slp_op_vectype
10009 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10010 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10011 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10013 if (dump_enabled_p ())
10014 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10015 "incompatible vector types for invariants\n");
10019 if (loop_vinfo
&& for_reduction
10020 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10022 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10023 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10024 ncopies
* vec_num
, vectype
, NULL
);
10025 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10026 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10028 if (dump_enabled_p ())
10029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10030 "conditional reduction prevents the use"
10031 " of partial vectors.\n");
10032 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10036 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10037 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10046 vec_oprnds0
.create (1);
10047 vec_oprnds1
.create (1);
10048 vec_oprnds2
.create (1);
10049 vec_oprnds3
.create (1);
10053 scalar_dest
= gimple_assign_lhs (stmt
);
10054 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10055 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10057 bool swap_cond_operands
= false;
10059 /* See whether another part of the vectorized code applies a loop
10060 mask to the condition, or to its inverse. */
10062 vec_loop_masks
*masks
= NULL
;
10063 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10065 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10066 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10069 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10070 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10071 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10074 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10075 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10076 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10078 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10079 cond_code
= cond
.code
;
10080 swap_cond_operands
= true;
10086 /* Handle cond expr. */
10088 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10089 cond_expr
, &vec_oprnds0
, comp_vectype
,
10090 then_clause
, &vec_oprnds2
, vectype
,
10091 reduction_type
!= EXTRACT_LAST_REDUCTION
10092 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10094 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10095 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10096 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10097 then_clause
, &vec_oprnds2
, vectype
,
10098 reduction_type
!= EXTRACT_LAST_REDUCTION
10099 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10101 /* Arguments are ready. Create the new vector stmt. */
10102 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10104 vec_then_clause
= vec_oprnds2
[i
];
10105 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10106 vec_else_clause
= vec_oprnds3
[i
];
10108 if (swap_cond_operands
)
10109 std::swap (vec_then_clause
, vec_else_clause
);
10112 vec_compare
= vec_cond_lhs
;
10115 vec_cond_rhs
= vec_oprnds1
[i
];
10116 if (bitop1
== NOP_EXPR
)
10118 gimple_seq stmts
= NULL
;
10119 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10120 vec_cond_lhs
, vec_cond_rhs
);
10121 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10125 new_temp
= make_ssa_name (vec_cmp_type
);
10127 if (bitop1
== BIT_NOT_EXPR
)
10128 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10132 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10134 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10135 if (bitop2
== NOP_EXPR
)
10136 vec_compare
= new_temp
;
10137 else if (bitop2
== BIT_NOT_EXPR
)
10139 /* Instead of doing ~x ? y : z do x ? z : y. */
10140 vec_compare
= new_temp
;
10141 std::swap (vec_then_clause
, vec_else_clause
);
10145 vec_compare
= make_ssa_name (vec_cmp_type
);
10147 = gimple_build_assign (vec_compare
, bitop2
,
10148 vec_cond_lhs
, new_temp
);
10149 vect_finish_stmt_generation (vinfo
, stmt_info
,
10155 /* If we decided to apply a loop mask to the result of the vector
10156 comparison, AND the comparison with the mask now. Later passes
10157 should then be able to reuse the AND results between mulitple
10161 for (int i = 0; i < 100; ++i)
10162 x[i] = y[i] ? z[i] : 10;
10164 results in following optimized GIMPLE:
10166 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10167 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10168 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10169 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10170 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10171 vect_iftmp.11_47, { 10, ... }>;
10173 instead of using a masked and unmasked forms of
10174 vec != { 0, ... } (masked in the MASK_LOAD,
10175 unmasked in the VEC_COND_EXPR). */
10177 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10178 in cases where that's necessary. */
10180 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10182 if (!is_gimple_val (vec_compare
))
10184 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10185 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10187 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10188 vec_compare
= vec_compare_name
;
10191 if (must_invert_cmp_result
)
10193 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10194 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10197 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10198 vec_compare
= vec_compare_name
;
10203 unsigned vec_num
= vec_oprnds0
.length ();
10205 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10207 tree tmp2
= make_ssa_name (vec_cmp_type
);
10209 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10211 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10212 vec_compare
= tmp2
;
10217 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10219 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10220 tree lhs
= gimple_get_lhs (old_stmt
);
10221 new_stmt
= gimple_build_call_internal
10222 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10224 gimple_call_set_lhs (new_stmt
, lhs
);
10225 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10226 if (old_stmt
== gsi_stmt (*gsi
))
10227 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10230 /* In this case we're moving the definition to later in the
10231 block. That doesn't matter because the only uses of the
10232 lhs are in phi statements. */
10233 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10234 gsi_remove (&old_gsi
, true);
10235 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10240 new_temp
= make_ssa_name (vec_dest
);
10241 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10242 vec_then_clause
, vec_else_clause
);
10243 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10246 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10248 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10252 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10254 vec_oprnds0
.release ();
10255 vec_oprnds1
.release ();
10256 vec_oprnds2
.release ();
10257 vec_oprnds3
.release ();
10262 /* vectorizable_comparison.
10264 Check if STMT_INFO is comparison expression that can be vectorized.
10265 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10266 comparison, put it in VEC_STMT, and insert it at GSI.
10268 Return true if STMT_INFO is vectorizable in this way. */
10271 vectorizable_comparison (vec_info
*vinfo
,
10272 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10274 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10276 tree lhs
, rhs1
, rhs2
;
10277 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10278 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10279 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10281 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10282 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10284 poly_uint64 nunits
;
10286 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10288 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10289 vec
<tree
> vec_oprnds0
= vNULL
;
10290 vec
<tree
> vec_oprnds1
= vNULL
;
10294 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10297 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10300 mask_type
= vectype
;
10301 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10306 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10308 gcc_assert (ncopies
>= 1);
10309 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10312 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10316 code
= gimple_assign_rhs_code (stmt
);
10318 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10321 slp_tree slp_rhs1
, slp_rhs2
;
10322 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10323 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10326 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10327 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10330 if (vectype1
&& vectype2
10331 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10332 TYPE_VECTOR_SUBPARTS (vectype2
)))
10335 vectype
= vectype1
? vectype1
: vectype2
;
10337 /* Invariant comparison. */
10340 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10341 vectype
= mask_type
;
10343 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10345 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10348 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10351 /* Can't compare mask and non-mask types. */
10352 if (vectype1
&& vectype2
10353 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10356 /* Boolean values may have another representation in vectors
10357 and therefore we prefer bit operations over comparison for
10358 them (which also works for scalar masks). We store opcodes
10359 to use in bitop1 and bitop2. Statement is vectorized as
10360 BITOP2 (rhs1 BITOP1 rhs2) or
10361 rhs1 BITOP2 (BITOP1 rhs2)
10362 depending on bitop1 and bitop2 arity. */
10363 bool swap_p
= false;
10364 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10366 if (code
== GT_EXPR
)
10368 bitop1
= BIT_NOT_EXPR
;
10369 bitop2
= BIT_AND_EXPR
;
10371 else if (code
== GE_EXPR
)
10373 bitop1
= BIT_NOT_EXPR
;
10374 bitop2
= BIT_IOR_EXPR
;
10376 else if (code
== LT_EXPR
)
10378 bitop1
= BIT_NOT_EXPR
;
10379 bitop2
= BIT_AND_EXPR
;
10382 else if (code
== LE_EXPR
)
10384 bitop1
= BIT_NOT_EXPR
;
10385 bitop2
= BIT_IOR_EXPR
;
10390 bitop1
= BIT_XOR_EXPR
;
10391 if (code
== EQ_EXPR
)
10392 bitop2
= BIT_NOT_EXPR
;
10398 if (bitop1
== NOP_EXPR
)
10400 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10405 machine_mode mode
= TYPE_MODE (vectype
);
10408 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10409 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10412 if (bitop2
!= NOP_EXPR
)
10414 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10415 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10420 /* Put types on constant and invariant SLP children. */
10422 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10423 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10425 if (dump_enabled_p ())
10426 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10427 "incompatible vector types for invariants\n");
10431 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10432 vect_model_simple_cost (vinfo
, stmt_info
,
10433 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10434 dts
, ndts
, slp_node
, cost_vec
);
10441 vec_oprnds0
.create (1);
10442 vec_oprnds1
.create (1);
10446 lhs
= gimple_assign_lhs (stmt
);
10447 mask
= vect_create_destination_var (lhs
, mask_type
);
10449 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10450 rhs1
, &vec_oprnds0
, vectype
,
10451 rhs2
, &vec_oprnds1
, vectype
);
10453 std::swap (vec_oprnds0
, vec_oprnds1
);
10455 /* Arguments are ready. Create the new vector stmt. */
10456 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10459 vec_rhs2
= vec_oprnds1
[i
];
10461 new_temp
= make_ssa_name (mask
);
10462 if (bitop1
== NOP_EXPR
)
10464 new_stmt
= gimple_build_assign (new_temp
, code
,
10465 vec_rhs1
, vec_rhs2
);
10466 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10470 if (bitop1
== BIT_NOT_EXPR
)
10471 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10473 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10475 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10476 if (bitop2
!= NOP_EXPR
)
10478 tree res
= make_ssa_name (mask
);
10479 if (bitop2
== BIT_NOT_EXPR
)
10480 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10482 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10484 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10488 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10490 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10494 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10496 vec_oprnds0
.release ();
10497 vec_oprnds1
.release ();
10502 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10503 can handle all live statements in the node. Otherwise return true
10504 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10505 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10508 can_vectorize_live_stmts (vec_info
*vinfo
,
10509 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10510 slp_tree slp_node
, slp_instance slp_node_instance
,
10512 stmt_vector_for_cost
*cost_vec
)
10516 stmt_vec_info slp_stmt_info
;
10518 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10520 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10521 && !vectorizable_live_operation (vinfo
,
10522 slp_stmt_info
, gsi
, slp_node
,
10523 slp_node_instance
, i
,
10524 vec_stmt_p
, cost_vec
))
10528 else if (STMT_VINFO_LIVE_P (stmt_info
)
10529 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
10530 slp_node
, slp_node_instance
, -1,
10531 vec_stmt_p
, cost_vec
))
10537 /* Make sure the statement is vectorizable. */
10540 vect_analyze_stmt (vec_info
*vinfo
,
10541 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10542 slp_tree node
, slp_instance node_instance
,
10543 stmt_vector_for_cost
*cost_vec
)
10545 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10546 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10548 gimple_seq pattern_def_seq
;
10550 if (dump_enabled_p ())
10551 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10554 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10555 return opt_result::failure_at (stmt_info
->stmt
,
10557 " stmt has volatile operands: %G\n",
10560 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10562 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10564 gimple_stmt_iterator si
;
10566 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10568 stmt_vec_info pattern_def_stmt_info
10569 = vinfo
->lookup_stmt (gsi_stmt (si
));
10570 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10571 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10573 /* Analyze def stmt of STMT if it's a pattern stmt. */
10574 if (dump_enabled_p ())
10575 dump_printf_loc (MSG_NOTE
, vect_location
,
10576 "==> examining pattern def statement: %G",
10577 pattern_def_stmt_info
->stmt
);
10580 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10581 need_to_vectorize
, node
, node_instance
,
10589 /* Skip stmts that do not need to be vectorized. In loops this is expected
10591 - the COND_EXPR which is the loop exit condition
10592 - any LABEL_EXPRs in the loop
10593 - computations that are used only for array indexing or loop control.
10594 In basic blocks we only analyze statements that are a part of some SLP
10595 instance, therefore, all the statements are relevant.
10597 Pattern statement needs to be analyzed instead of the original statement
10598 if the original statement is not relevant. Otherwise, we analyze both
10599 statements. In basic blocks we are called from some SLP instance
10600 traversal, don't analyze pattern stmts instead, the pattern stmts
10601 already will be part of SLP instance. */
10603 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10604 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10605 && !STMT_VINFO_LIVE_P (stmt_info
))
10607 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10608 && pattern_stmt_info
10609 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10610 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10612 /* Analyze PATTERN_STMT instead of the original stmt. */
10613 stmt_info
= pattern_stmt_info
;
10614 if (dump_enabled_p ())
10615 dump_printf_loc (MSG_NOTE
, vect_location
,
10616 "==> examining pattern statement: %G",
10621 if (dump_enabled_p ())
10622 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10624 return opt_result::success ();
10627 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10629 && pattern_stmt_info
10630 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10631 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10633 /* Analyze PATTERN_STMT too. */
10634 if (dump_enabled_p ())
10635 dump_printf_loc (MSG_NOTE
, vect_location
,
10636 "==> examining pattern statement: %G",
10637 pattern_stmt_info
->stmt
);
10640 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
10641 node_instance
, cost_vec
);
10646 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10648 case vect_internal_def
:
10651 case vect_reduction_def
:
10652 case vect_nested_cycle
:
10653 gcc_assert (!bb_vinfo
10654 && (relevance
== vect_used_in_outer
10655 || relevance
== vect_used_in_outer_by_reduction
10656 || relevance
== vect_used_by_reduction
10657 || relevance
== vect_unused_in_scope
10658 || relevance
== vect_used_only_live
));
10661 case vect_induction_def
:
10662 gcc_assert (!bb_vinfo
);
10665 case vect_constant_def
:
10666 case vect_external_def
:
10667 case vect_unknown_def_type
:
10669 gcc_unreachable ();
10672 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10674 tree type
= gimple_expr_type (stmt_info
->stmt
);
10675 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
10676 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10677 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10678 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10679 *need_to_vectorize
= true;
10682 if (PURE_SLP_STMT (stmt_info
) && !node
)
10684 if (dump_enabled_p ())
10685 dump_printf_loc (MSG_NOTE
, vect_location
,
10686 "handled only by SLP analysis\n");
10687 return opt_result::success ();
10692 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10693 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10694 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10695 -mveclibabi= takes preference over library functions with
10696 the simd attribute. */
10697 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10698 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
10700 || vectorizable_conversion (vinfo
, stmt_info
,
10701 NULL
, NULL
, node
, cost_vec
)
10702 || vectorizable_operation (vinfo
, stmt_info
,
10703 NULL
, NULL
, node
, cost_vec
)
10704 || vectorizable_assignment (vinfo
, stmt_info
,
10705 NULL
, NULL
, node
, cost_vec
)
10706 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10707 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10708 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10709 node
, node_instance
, cost_vec
)
10710 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10711 NULL
, node
, cost_vec
)
10712 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10713 || vectorizable_condition (vinfo
, stmt_info
,
10714 NULL
, NULL
, node
, cost_vec
)
10715 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
10717 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
10718 stmt_info
, NULL
, node
));
10722 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10723 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
10724 NULL
, NULL
, node
, cost_vec
)
10725 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
10727 || vectorizable_shift (vinfo
, stmt_info
,
10728 NULL
, NULL
, node
, cost_vec
)
10729 || vectorizable_operation (vinfo
, stmt_info
,
10730 NULL
, NULL
, node
, cost_vec
)
10731 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
10733 || vectorizable_load (vinfo
, stmt_info
,
10734 NULL
, NULL
, node
, cost_vec
)
10735 || vectorizable_store (vinfo
, stmt_info
,
10736 NULL
, NULL
, node
, cost_vec
)
10737 || vectorizable_condition (vinfo
, stmt_info
,
10738 NULL
, NULL
, node
, cost_vec
)
10739 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
10741 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
10745 return opt_result::failure_at (stmt_info
->stmt
,
10747 " relevant stmt not supported: %G",
10750 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10751 need extra handling, except for vectorizable reductions. */
10753 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10754 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
10755 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
10756 stmt_info
, NULL
, node
, node_instance
,
10758 return opt_result::failure_at (stmt_info
->stmt
,
10760 " live stmt not supported: %G",
10763 return opt_result::success ();
10767 /* Function vect_transform_stmt.
10769 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10772 vect_transform_stmt (vec_info
*vinfo
,
10773 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10774 slp_tree slp_node
, slp_instance slp_node_instance
)
10776 bool is_store
= false;
10777 gimple
*vec_stmt
= NULL
;
10780 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
10782 switch (STMT_VINFO_TYPE (stmt_info
))
10784 case type_demotion_vec_info_type
:
10785 case type_promotion_vec_info_type
:
10786 case type_conversion_vec_info_type
:
10787 done
= vectorizable_conversion (vinfo
, stmt_info
,
10788 gsi
, &vec_stmt
, slp_node
, NULL
);
10792 case induc_vec_info_type
:
10793 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
10794 stmt_info
, &vec_stmt
, slp_node
,
10799 case shift_vec_info_type
:
10800 done
= vectorizable_shift (vinfo
, stmt_info
,
10801 gsi
, &vec_stmt
, slp_node
, NULL
);
10805 case op_vec_info_type
:
10806 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
10811 case assignment_vec_info_type
:
10812 done
= vectorizable_assignment (vinfo
, stmt_info
,
10813 gsi
, &vec_stmt
, slp_node
, NULL
);
10817 case load_vec_info_type
:
10818 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
10823 case store_vec_info_type
:
10824 done
= vectorizable_store (vinfo
, stmt_info
,
10825 gsi
, &vec_stmt
, slp_node
, NULL
);
10827 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
10829 /* In case of interleaving, the whole chain is vectorized when the
10830 last store in the chain is reached. Store stmts before the last
10831 one are skipped, and there vec_stmt_info shouldn't be freed
10833 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10834 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
10841 case condition_vec_info_type
:
10842 done
= vectorizable_condition (vinfo
, stmt_info
,
10843 gsi
, &vec_stmt
, slp_node
, NULL
);
10847 case comparison_vec_info_type
:
10848 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
10853 case call_vec_info_type
:
10854 done
= vectorizable_call (vinfo
, stmt_info
,
10855 gsi
, &vec_stmt
, slp_node
, NULL
);
10858 case call_simd_clone_vec_info_type
:
10859 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
10863 case reduc_vec_info_type
:
10864 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10865 gsi
, &vec_stmt
, slp_node
);
10869 case cycle_phi_info_type
:
10870 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10871 &vec_stmt
, slp_node
, slp_node_instance
);
10875 case lc_phi_info_type
:
10876 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
10877 stmt_info
, &vec_stmt
, slp_node
);
10881 case phi_info_type
:
10882 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
10887 if (!STMT_VINFO_LIVE_P (stmt_info
))
10889 if (dump_enabled_p ())
10890 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10891 "stmt not supported.\n");
10892 gcc_unreachable ();
10897 if (!slp_node
&& vec_stmt
)
10898 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
10900 if (STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
10903 /* Handle stmts whose DEF is used outside the loop-nest that is
10904 being vectorized. */
10905 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
10906 slp_node_instance
, true, NULL
);
10913 /* Remove a group of stores (for SLP or interleaving), free their
10917 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
10919 stmt_vec_info next_stmt_info
= first_stmt_info
;
10921 while (next_stmt_info
)
10923 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10924 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
10925 /* Free the attached stmt_vec_info and remove the stmt. */
10926 vinfo
->remove_stmt (next_stmt_info
);
10927 next_stmt_info
= tmp
;
10931 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10932 elements of type SCALAR_TYPE, or null if the target doesn't support
10935 If NUNITS is zero, return a vector type that contains elements of
10936 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10938 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10939 for this vectorization region and want to "autodetect" the best choice.
10940 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10941 and we want the new type to be interoperable with it. PREVAILING_MODE
10942 in this case can be a scalar integer mode or a vector mode; when it
10943 is a vector mode, the function acts like a tree-level version of
10944 related_vector_mode. */
10947 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
10948 tree scalar_type
, poly_uint64 nunits
)
10950 tree orig_scalar_type
= scalar_type
;
10951 scalar_mode inner_mode
;
10952 machine_mode simd_mode
;
10955 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
10956 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
10959 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
10961 /* For vector types of elements whose mode precision doesn't
10962 match their types precision we use a element type of mode
10963 precision. The vectorization routines will have to make sure
10964 they support the proper result truncation/extension.
10965 We also make sure to build vector types with INTEGER_TYPE
10966 component type only. */
10967 if (INTEGRAL_TYPE_P (scalar_type
)
10968 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
10969 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
10970 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
10971 TYPE_UNSIGNED (scalar_type
));
10973 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10974 When the component mode passes the above test simply use a type
10975 corresponding to that mode. The theory is that any use that
10976 would cause problems with this will disable vectorization anyway. */
10977 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
10978 && !INTEGRAL_TYPE_P (scalar_type
))
10979 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
10981 /* We can't build a vector type of elements with alignment bigger than
10983 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
10984 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
10985 TYPE_UNSIGNED (scalar_type
));
10987 /* If we felt back to using the mode fail if there was
10988 no scalar type for it. */
10989 if (scalar_type
== NULL_TREE
)
10992 /* If no prevailing mode was supplied, use the mode the target prefers.
10993 Otherwise lookup a vector mode based on the prevailing mode. */
10994 if (prevailing_mode
== VOIDmode
)
10996 gcc_assert (known_eq (nunits
, 0U));
10997 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
10998 if (SCALAR_INT_MODE_P (simd_mode
))
11000 /* Traditional behavior is not to take the integer mode
11001 literally, but simply to use it as a way of determining
11002 the vector size. It is up to mode_for_vector to decide
11003 what the TYPE_MODE should be.
11005 Note that nunits == 1 is allowed in order to support single
11006 element vector types. */
11007 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11008 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11012 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11013 || !related_vector_mode (prevailing_mode
,
11014 inner_mode
, nunits
).exists (&simd_mode
))
11016 /* Fall back to using mode_for_vector, mostly in the hope of being
11017 able to use an integer mode. */
11018 if (known_eq (nunits
, 0U)
11019 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11022 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11026 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11028 /* In cases where the mode was chosen by mode_for_vector, check that
11029 the target actually supports the chosen mode, or that it at least
11030 allows the vector mode to be replaced by a like-sized integer. */
11031 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11032 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11035 /* Re-attach the address-space qualifier if we canonicalized the scalar
11037 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11038 return build_qualified_type
11039 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11044 /* Function get_vectype_for_scalar_type.
11046 Returns the vector type corresponding to SCALAR_TYPE as supported
11047 by the target. If GROUP_SIZE is nonzero and we're performing BB
11048 vectorization, make sure that the number of elements in the vector
11049 is no bigger than GROUP_SIZE. */
11052 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11053 unsigned int group_size
)
11055 /* For BB vectorization, we should always have a group size once we've
11056 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11057 are tentative requests during things like early data reference
11058 analysis and pattern recognition. */
11059 if (is_a
<bb_vec_info
> (vinfo
))
11060 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11064 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11066 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11067 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11069 /* Register the natural choice of vector type, before the group size
11070 has been applied. */
11072 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11074 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11075 try again with an explicit number of elements. */
11078 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11080 /* Start with the biggest number of units that fits within
11081 GROUP_SIZE and halve it until we find a valid vector type.
11082 Usually either the first attempt will succeed or all will
11083 fail (in the latter case because GROUP_SIZE is too small
11084 for the target), but it's possible that a target could have
11085 a hole between supported vector types.
11087 If GROUP_SIZE is not a power of 2, this has the effect of
11088 trying the largest power of 2 that fits within the group,
11089 even though the group is not a multiple of that vector size.
11090 The BB vectorizer will then try to carve up the group into
11092 unsigned int nunits
= 1 << floor_log2 (group_size
);
11095 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11096 scalar_type
, nunits
);
11099 while (nunits
> 1 && !vectype
);
11105 /* Return the vector type corresponding to SCALAR_TYPE as supported
11106 by the target. NODE, if nonnull, is the SLP tree node that will
11107 use the returned vector type. */
11110 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11112 unsigned int group_size
= 0;
11114 group_size
= SLP_TREE_LANES (node
);
11115 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11118 /* Function get_mask_type_for_scalar_type.
11120 Returns the mask type corresponding to a result of comparison
11121 of vectors of specified SCALAR_TYPE as supported by target.
11122 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11123 make sure that the number of elements in the vector is no bigger
11124 than GROUP_SIZE. */
11127 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11128 unsigned int group_size
)
11130 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11135 return truth_type_for (vectype
);
11138 /* Function get_same_sized_vectype
11140 Returns a vector type corresponding to SCALAR_TYPE of size
11141 VECTOR_TYPE if supported by the target. */
11144 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11146 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11147 return truth_type_for (vector_type
);
11149 poly_uint64 nunits
;
11150 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11151 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11154 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11155 scalar_type
, nunits
);
11158 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11159 would not change the chosen vector modes. */
11162 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11164 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11165 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11166 if (!VECTOR_MODE_P (*i
)
11167 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11172 /* Function vect_is_simple_use.
11175 VINFO - the vect info of the loop or basic block that is being vectorized.
11176 OPERAND - operand in the loop or bb.
11178 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11179 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11180 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11181 the definition could be anywhere in the function
11182 DT - the type of definition
11184 Returns whether a stmt with OPERAND can be vectorized.
11185 For loops, supportable operands are constants, loop invariants, and operands
11186 that are defined by the current iteration of the loop. Unsupportable
11187 operands are those that are defined by a previous iteration of the loop (as
11188 is the case in reduction/induction computations).
11189 For basic blocks, supportable operands are constants and bb invariants.
11190 For now, operands defined outside the basic block are not supported. */
11193 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11194 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11196 if (def_stmt_info_out
)
11197 *def_stmt_info_out
= NULL
;
11199 *def_stmt_out
= NULL
;
11200 *dt
= vect_unknown_def_type
;
11202 if (dump_enabled_p ())
11204 dump_printf_loc (MSG_NOTE
, vect_location
,
11205 "vect_is_simple_use: operand ");
11206 if (TREE_CODE (operand
) == SSA_NAME
11207 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11208 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11210 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11213 if (CONSTANT_CLASS_P (operand
))
11214 *dt
= vect_constant_def
;
11215 else if (is_gimple_min_invariant (operand
))
11216 *dt
= vect_external_def
;
11217 else if (TREE_CODE (operand
) != SSA_NAME
)
11218 *dt
= vect_unknown_def_type
;
11219 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11220 *dt
= vect_external_def
;
11223 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11224 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11226 *dt
= vect_external_def
;
11229 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11230 def_stmt
= stmt_vinfo
->stmt
;
11231 switch (gimple_code (def_stmt
))
11234 case GIMPLE_ASSIGN
:
11236 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11239 *dt
= vect_unknown_def_type
;
11242 if (def_stmt_info_out
)
11243 *def_stmt_info_out
= stmt_vinfo
;
11246 *def_stmt_out
= def_stmt
;
11249 if (dump_enabled_p ())
11251 dump_printf (MSG_NOTE
, ", type of def: ");
11254 case vect_uninitialized_def
:
11255 dump_printf (MSG_NOTE
, "uninitialized\n");
11257 case vect_constant_def
:
11258 dump_printf (MSG_NOTE
, "constant\n");
11260 case vect_external_def
:
11261 dump_printf (MSG_NOTE
, "external\n");
11263 case vect_internal_def
:
11264 dump_printf (MSG_NOTE
, "internal\n");
11266 case vect_induction_def
:
11267 dump_printf (MSG_NOTE
, "induction\n");
11269 case vect_reduction_def
:
11270 dump_printf (MSG_NOTE
, "reduction\n");
11272 case vect_double_reduction_def
:
11273 dump_printf (MSG_NOTE
, "double reduction\n");
11275 case vect_nested_cycle
:
11276 dump_printf (MSG_NOTE
, "nested cycle\n");
11278 case vect_unknown_def_type
:
11279 dump_printf (MSG_NOTE
, "unknown\n");
11284 if (*dt
== vect_unknown_def_type
)
11286 if (dump_enabled_p ())
11287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11288 "Unsupported pattern.\n");
11295 /* Function vect_is_simple_use.
11297 Same as vect_is_simple_use but also determines the vector operand
11298 type of OPERAND and stores it to *VECTYPE. If the definition of
11299 OPERAND is vect_uninitialized_def, vect_constant_def or
11300 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11301 is responsible to compute the best suited vector type for the
11305 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11306 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11307 gimple
**def_stmt_out
)
11309 stmt_vec_info def_stmt_info
;
11311 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11315 *def_stmt_out
= def_stmt
;
11316 if (def_stmt_info_out
)
11317 *def_stmt_info_out
= def_stmt_info
;
11319 /* Now get a vector type if the def is internal, otherwise supply
11320 NULL_TREE and leave it up to the caller to figure out a proper
11321 type for the use stmt. */
11322 if (*dt
== vect_internal_def
11323 || *dt
== vect_induction_def
11324 || *dt
== vect_reduction_def
11325 || *dt
== vect_double_reduction_def
11326 || *dt
== vect_nested_cycle
)
11328 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11329 gcc_assert (*vectype
!= NULL_TREE
);
11330 if (dump_enabled_p ())
11331 dump_printf_loc (MSG_NOTE
, vect_location
,
11332 "vect_is_simple_use: vectype %T\n", *vectype
);
11334 else if (*dt
== vect_uninitialized_def
11335 || *dt
== vect_constant_def
11336 || *dt
== vect_external_def
)
11337 *vectype
= NULL_TREE
;
11339 gcc_unreachable ();
11344 /* Function vect_is_simple_use.
11346 Same as vect_is_simple_use but determines the operand by operand
11347 position OPERAND from either STMT or SLP_NODE, filling in *OP
11348 and *SLP_DEF (when SLP_NODE is not NULL). */
11351 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11352 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11353 enum vect_def_type
*dt
,
11354 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11358 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11360 *vectype
= SLP_TREE_VECTYPE (child
);
11361 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11363 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11364 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
11368 if (def_stmt_info_out
)
11369 *def_stmt_info_out
= NULL
;
11370 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11371 *dt
= SLP_TREE_DEF_TYPE (child
);
11378 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11380 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11381 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11384 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11386 *op
= gimple_op (ass
, operand
);
11388 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11389 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11391 *op
= gimple_op (ass
, operand
+ 1);
11393 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11395 if (gimple_call_internal_p (call
)
11396 && internal_store_fn_p (gimple_call_internal_fn (call
)))
11397 operand
= internal_fn_stored_value_index (gimple_call_internal_fn
11399 *op
= gimple_call_arg (call
, operand
);
11402 gcc_unreachable ();
11403 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11407 /* If OP is not NULL and is external or constant update its vector
11408 type with VECTYPE. Returns true if successful or false if not,
11409 for example when conflicting vector types are present. */
11412 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11414 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11416 if (SLP_TREE_VECTYPE (op
))
11417 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11418 SLP_TREE_VECTYPE (op
) = vectype
;
11422 /* Function supportable_widening_operation
11424 Check whether an operation represented by the code CODE is a
11425 widening operation that is supported by the target platform in
11426 vector form (i.e., when operating on arguments of type VECTYPE_IN
11427 producing a result of type VECTYPE_OUT).
11429 Widening operations we currently support are NOP (CONVERT), FLOAT,
11430 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11431 are supported by the target platform either directly (via vector
11432 tree-codes), or via target builtins.
11435 - CODE1 and CODE2 are codes of vector operations to be used when
11436 vectorizing the operation, if available.
11437 - MULTI_STEP_CVT determines the number of required intermediate steps in
11438 case of multi-step conversion (like char->short->int - in that case
11439 MULTI_STEP_CVT will be 1).
11440 - INTERM_TYPES contains the intermediate type required to perform the
11441 widening operation (short in the above example). */
11444 supportable_widening_operation (vec_info
*vinfo
,
11445 enum tree_code code
, stmt_vec_info stmt_info
,
11446 tree vectype_out
, tree vectype_in
,
11447 enum tree_code
*code1
, enum tree_code
*code2
,
11448 int *multi_step_cvt
,
11449 vec
<tree
> *interm_types
)
11451 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11452 class loop
*vect_loop
= NULL
;
11453 machine_mode vec_mode
;
11454 enum insn_code icode1
, icode2
;
11455 optab optab1
, optab2
;
11456 tree vectype
= vectype_in
;
11457 tree wide_vectype
= vectype_out
;
11458 enum tree_code c1
, c2
;
11460 tree prev_type
, intermediate_type
;
11461 machine_mode intermediate_mode
, prev_mode
;
11462 optab optab3
, optab4
;
11464 *multi_step_cvt
= 0;
11466 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11470 case WIDEN_MULT_EXPR
:
11471 /* The result of a vectorized widening operation usually requires
11472 two vectors (because the widened results do not fit into one vector).
11473 The generated vector results would normally be expected to be
11474 generated in the same order as in the original scalar computation,
11475 i.e. if 8 results are generated in each vector iteration, they are
11476 to be organized as follows:
11477 vect1: [res1,res2,res3,res4],
11478 vect2: [res5,res6,res7,res8].
11480 However, in the special case that the result of the widening
11481 operation is used in a reduction computation only, the order doesn't
11482 matter (because when vectorizing a reduction we change the order of
11483 the computation). Some targets can take advantage of this and
11484 generate more efficient code. For example, targets like Altivec,
11485 that support widen_mult using a sequence of {mult_even,mult_odd}
11486 generate the following vectors:
11487 vect1: [res1,res3,res5,res7],
11488 vect2: [res2,res4,res6,res8].
11490 When vectorizing outer-loops, we execute the inner-loop sequentially
11491 (each vectorized inner-loop iteration contributes to VF outer-loop
11492 iterations in parallel). We therefore don't allow to change the
11493 order of the computation in the inner-loop during outer-loop
11495 /* TODO: Another case in which order doesn't *really* matter is when we
11496 widen and then contract again, e.g. (short)((int)x * y >> 8).
11497 Normally, pack_trunc performs an even/odd permute, whereas the
11498 repack from an even/odd expansion would be an interleave, which
11499 would be significantly simpler for e.g. AVX2. */
11500 /* In any case, in order to avoid duplicating the code below, recurse
11501 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11502 are properly set up for the caller. If we fail, we'll continue with
11503 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11505 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11506 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11507 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11508 stmt_info
, vectype_out
,
11509 vectype_in
, code1
, code2
,
11510 multi_step_cvt
, interm_types
))
11512 /* Elements in a vector with vect_used_by_reduction property cannot
11513 be reordered if the use chain with this property does not have the
11514 same operation. One such an example is s += a * b, where elements
11515 in a and b cannot be reordered. Here we check if the vector defined
11516 by STMT is only directly used in the reduction statement. */
11517 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11518 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11520 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11523 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11524 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11527 case DOT_PROD_EXPR
:
11528 c1
= DOT_PROD_EXPR
;
11529 c2
= DOT_PROD_EXPR
;
11537 case VEC_WIDEN_MULT_EVEN_EXPR
:
11538 /* Support the recursion induced just above. */
11539 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11540 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11543 case WIDEN_LSHIFT_EXPR
:
11544 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11545 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11548 case WIDEN_PLUS_EXPR
:
11549 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
11550 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
11553 case WIDEN_MINUS_EXPR
:
11554 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
11555 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
11559 c1
= VEC_UNPACK_LO_EXPR
;
11560 c2
= VEC_UNPACK_HI_EXPR
;
11564 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11565 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11568 case FIX_TRUNC_EXPR
:
11569 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11570 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11574 gcc_unreachable ();
11577 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11578 std::swap (c1
, c2
);
11580 if (code
== FIX_TRUNC_EXPR
)
11582 /* The signedness is determined from output operand. */
11583 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11584 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11586 else if (CONVERT_EXPR_CODE_P (code
)
11587 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11588 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11589 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11590 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11592 /* If the input and result modes are the same, a different optab
11593 is needed where we pass in the number of units in vectype. */
11594 optab1
= vec_unpacks_sbool_lo_optab
;
11595 optab2
= vec_unpacks_sbool_hi_optab
;
11599 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11600 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11603 if (!optab1
|| !optab2
)
11606 vec_mode
= TYPE_MODE (vectype
);
11607 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11608 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11614 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11615 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11617 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11619 /* For scalar masks we may have different boolean
11620 vector types having the same QImode. Thus we
11621 add additional check for elements number. */
11622 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11623 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11627 /* Check if it's a multi-step conversion that can be done using intermediate
11630 prev_type
= vectype
;
11631 prev_mode
= vec_mode
;
11633 if (!CONVERT_EXPR_CODE_P (code
))
11636 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11637 intermediate steps in promotion sequence. We try
11638 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11640 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11641 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11643 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11644 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11646 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
11649 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11650 TYPE_UNSIGNED (prev_type
));
11652 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11653 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11654 && intermediate_mode
== prev_mode
11655 && SCALAR_INT_MODE_P (prev_mode
))
11657 /* If the input and result modes are the same, a different optab
11658 is needed where we pass in the number of units in vectype. */
11659 optab3
= vec_unpacks_sbool_lo_optab
;
11660 optab4
= vec_unpacks_sbool_hi_optab
;
11664 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11665 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11668 if (!optab3
|| !optab4
11669 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11670 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11671 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11672 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11673 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11674 == CODE_FOR_nothing
)
11675 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11676 == CODE_FOR_nothing
))
11679 interm_types
->quick_push (intermediate_type
);
11680 (*multi_step_cvt
)++;
11682 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11683 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11685 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11687 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11688 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11692 prev_type
= intermediate_type
;
11693 prev_mode
= intermediate_mode
;
11696 interm_types
->release ();
11701 /* Function supportable_narrowing_operation
11703 Check whether an operation represented by the code CODE is a
11704 narrowing operation that is supported by the target platform in
11705 vector form (i.e., when operating on arguments of type VECTYPE_IN
11706 and producing a result of type VECTYPE_OUT).
11708 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11709 and FLOAT. This function checks if these operations are supported by
11710 the target platform directly via vector tree-codes.
11713 - CODE1 is the code of a vector operation to be used when
11714 vectorizing the operation, if available.
11715 - MULTI_STEP_CVT determines the number of required intermediate steps in
11716 case of multi-step conversion (like int->short->char - in that case
11717 MULTI_STEP_CVT will be 1).
11718 - INTERM_TYPES contains the intermediate type required to perform the
11719 narrowing operation (short in the above example). */
11722 supportable_narrowing_operation (enum tree_code code
,
11723 tree vectype_out
, tree vectype_in
,
11724 enum tree_code
*code1
, int *multi_step_cvt
,
11725 vec
<tree
> *interm_types
)
11727 machine_mode vec_mode
;
11728 enum insn_code icode1
;
11729 optab optab1
, interm_optab
;
11730 tree vectype
= vectype_in
;
11731 tree narrow_vectype
= vectype_out
;
11733 tree intermediate_type
, prev_type
;
11734 machine_mode intermediate_mode
, prev_mode
;
11738 *multi_step_cvt
= 0;
11742 c1
= VEC_PACK_TRUNC_EXPR
;
11743 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
11744 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11745 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
11746 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11747 optab1
= vec_pack_sbool_trunc_optab
;
11749 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11752 case FIX_TRUNC_EXPR
:
11753 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
11754 /* The signedness is determined from output operand. */
11755 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11759 c1
= VEC_PACK_FLOAT_EXPR
;
11760 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11764 gcc_unreachable ();
11770 vec_mode
= TYPE_MODE (vectype
);
11771 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
11776 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11778 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11780 /* For scalar masks we may have different boolean
11781 vector types having the same QImode. Thus we
11782 add additional check for elements number. */
11783 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
11784 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11788 if (code
== FLOAT_EXPR
)
11791 /* Check if it's a multi-step conversion that can be done using intermediate
11793 prev_mode
= vec_mode
;
11794 prev_type
= vectype
;
11795 if (code
== FIX_TRUNC_EXPR
)
11796 uns
= TYPE_UNSIGNED (vectype_out
);
11798 uns
= TYPE_UNSIGNED (vectype
);
11800 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11801 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11802 costly than signed. */
11803 if (code
== FIX_TRUNC_EXPR
&& uns
)
11805 enum insn_code icode2
;
11808 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
11810 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11811 if (interm_optab
!= unknown_optab
11812 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
11813 && insn_data
[icode1
].operand
[0].mode
11814 == insn_data
[icode2
].operand
[0].mode
)
11817 optab1
= interm_optab
;
11822 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11823 intermediate steps in promotion sequence. We try
11824 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11825 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11826 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11828 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11829 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11831 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
11834 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
11835 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11836 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11837 && intermediate_mode
== prev_mode
11838 && SCALAR_INT_MODE_P (prev_mode
))
11839 interm_optab
= vec_pack_sbool_trunc_optab
;
11842 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
11845 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
11846 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11847 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
11848 == CODE_FOR_nothing
))
11851 interm_types
->quick_push (intermediate_type
);
11852 (*multi_step_cvt
)++;
11854 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11856 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11858 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
11859 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11863 prev_mode
= intermediate_mode
;
11864 prev_type
= intermediate_type
;
11865 optab1
= interm_optab
;
11868 interm_types
->release ();
11872 /* Generate and return a statement that sets vector mask MASK such that
11873 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11876 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
11878 tree cmp_type
= TREE_TYPE (start_index
);
11879 tree mask_type
= TREE_TYPE (mask
);
11880 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
11881 cmp_type
, mask_type
,
11882 OPTIMIZE_FOR_SPEED
));
11883 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
11884 start_index
, end_index
,
11885 build_zero_cst (mask_type
));
11886 gimple_call_set_lhs (call
, mask
);
11890 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11891 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11894 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
11897 tree tmp
= make_ssa_name (mask_type
);
11898 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
11899 gimple_seq_add_stmt (seq
, call
);
11900 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
11903 /* Try to compute the vector types required to vectorize STMT_INFO,
11904 returning true on success and false if vectorization isn't possible.
11905 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11906 take sure that the number of elements in the vectors is no bigger
11911 - Set *STMT_VECTYPE_OUT to:
11912 - NULL_TREE if the statement doesn't need to be vectorized;
11913 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11915 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11916 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11917 statement does not help to determine the overall number of units. */
11920 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
11921 tree
*stmt_vectype_out
,
11922 tree
*nunits_vectype_out
,
11923 unsigned int group_size
)
11925 gimple
*stmt
= stmt_info
->stmt
;
11927 /* For BB vectorization, we should always have a group size once we've
11928 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11929 are tentative requests during things like early data reference
11930 analysis and pattern recognition. */
11931 if (is_a
<bb_vec_info
> (vinfo
))
11932 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11936 *stmt_vectype_out
= NULL_TREE
;
11937 *nunits_vectype_out
= NULL_TREE
;
11939 if (gimple_get_lhs (stmt
) == NULL_TREE
11940 /* MASK_STORE has no lhs, but is ok. */
11941 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11943 if (is_a
<gcall
*> (stmt
))
11945 /* Ignore calls with no lhs. These must be calls to
11946 #pragma omp simd functions, and what vectorization factor
11947 it really needs can't be determined until
11948 vectorizable_simd_clone_call. */
11949 if (dump_enabled_p ())
11950 dump_printf_loc (MSG_NOTE
, vect_location
,
11951 "defer to SIMD clone analysis.\n");
11952 return opt_result::success ();
11955 return opt_result::failure_at (stmt
,
11956 "not vectorized: irregular stmt.%G", stmt
);
11959 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
11960 return opt_result::failure_at (stmt
,
11961 "not vectorized: vector stmt in loop:%G",
11965 tree scalar_type
= NULL_TREE
;
11966 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
11968 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11969 if (dump_enabled_p ())
11970 dump_printf_loc (MSG_NOTE
, vect_location
,
11971 "precomputed vectype: %T\n", vectype
);
11973 else if (vect_use_mask_type_p (stmt_info
))
11975 unsigned int precision
= stmt_info
->mask_precision
;
11976 scalar_type
= build_nonstandard_integer_type (precision
, 1);
11977 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
11979 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
11980 " data-type %T\n", scalar_type
);
11981 if (dump_enabled_p ())
11982 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
11986 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
11987 scalar_type
= TREE_TYPE (DR_REF (dr
));
11988 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11989 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
11991 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
11993 if (dump_enabled_p ())
11996 dump_printf_loc (MSG_NOTE
, vect_location
,
11997 "get vectype for scalar type (group size %d):"
11998 " %T\n", group_size
, scalar_type
);
12000 dump_printf_loc (MSG_NOTE
, vect_location
,
12001 "get vectype for scalar type: %T\n", scalar_type
);
12003 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12005 return opt_result::failure_at (stmt
,
12007 " unsupported data-type %T\n",
12010 if (dump_enabled_p ())
12011 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12013 *stmt_vectype_out
= vectype
;
12015 /* Don't try to compute scalar types if the stmt produces a boolean
12016 vector; use the existing vector type instead. */
12017 tree nunits_vectype
= vectype
;
12018 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12020 /* The number of units is set according to the smallest scalar
12021 type (or the largest vector size, but we only support one
12022 vector size per vectorization). */
12023 HOST_WIDE_INT dummy
;
12024 scalar_type
= vect_get_smallest_scalar_type (stmt_info
, &dummy
, &dummy
);
12025 if (scalar_type
!= TREE_TYPE (vectype
))
12027 if (dump_enabled_p ())
12028 dump_printf_loc (MSG_NOTE
, vect_location
,
12029 "get vectype for smallest scalar type: %T\n",
12031 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12033 if (!nunits_vectype
)
12034 return opt_result::failure_at
12035 (stmt
, "not vectorized: unsupported data-type %T\n",
12037 if (dump_enabled_p ())
12038 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12043 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12044 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)));
12046 if (dump_enabled_p ())
12048 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12049 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12050 dump_printf (MSG_NOTE
, "\n");
12053 *nunits_vectype_out
= nunits_vectype
;
12054 return opt_result::success ();
12057 /* Generate and return statement sequence that sets vector length LEN that is:
12059 min_of_start_and_end = min (START_INDEX, END_INDEX);
12060 left_len = END_INDEX - min_of_start_and_end;
12061 rhs = min (left_len, LEN_LIMIT);
12064 Note: the cost of the code generated by this function is modeled
12065 by vect_estimate_min_profitable_iters, so changes here may need
12066 corresponding changes there. */
12069 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12071 gimple_seq stmts
= NULL
;
12072 tree len_type
= TREE_TYPE (len
);
12073 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12075 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12076 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12077 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12078 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12079 gimple_seq_add_stmt (&stmts
, stmt
);