1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE
, vect_location
,
642 "init: stmt relevant? %G", stmt_info
->stmt
);
644 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
645 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
649 /* 2. Process_worklist */
650 while (worklist
.length () > 0)
655 stmt_vec_info stmt_vinfo
= worklist
.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
,
658 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
663 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
678 case vect_reduction_def
:
679 gcc_assert (relevant
!= vect_unused_in_scope
);
680 if (relevant
!= vect_unused_in_scope
681 && relevant
!= vect_used_in_scope
682 && relevant
!= vect_used_by_reduction
683 && relevant
!= vect_used_only_live
)
684 return opt_result::failure_at
685 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
688 case vect_nested_cycle
:
689 if (relevant
!= vect_unused_in_scope
690 && relevant
!= vect_used_in_outer_by_reduction
691 && relevant
!= vect_used_in_outer
)
692 return opt_result::failure_at
693 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
696 case vect_double_reduction_def
:
697 if (relevant
!= vect_unused_in_scope
698 && relevant
!= vect_used_by_reduction
699 && relevant
!= vect_used_only_live
)
700 return opt_result::failure_at
701 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
708 if (is_pattern_stmt_p (stmt_vinfo
))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
715 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
716 tree op
= gimple_assign_rhs1 (assign
);
719 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
722 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
723 loop_vinfo
, relevant
, &worklist
, false);
726 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
727 loop_vinfo
, relevant
, &worklist
, false);
732 for (; i
< gimple_num_ops (assign
); i
++)
734 op
= gimple_op (assign
, i
);
735 if (TREE_CODE (op
) == SSA_NAME
)
738 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
745 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
747 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
749 tree arg
= gimple_call_arg (call
, i
);
751 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
759 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
761 tree op
= USE_FROM_PTR (use_p
);
763 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
771 gather_scatter_info gs_info
;
772 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
775 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
784 } /* while worklist */
786 return opt_result::success ();
789 /* Function vect_model_simple_cost.
791 Models cost for simple operations, i.e. those that only emit ncopies of a
792 single op. Right now, this does not account for multiple insns that could
793 be generated for the single vector op. We will handle that shortly. */
796 vect_model_simple_cost (vec_info
*,
797 stmt_vec_info stmt_info
, int ncopies
,
798 enum vect_def_type
*dt
,
801 stmt_vector_for_cost
*cost_vec
,
802 vect_cost_for_stmt kind
= vector_stmt
)
804 int inside_cost
= 0, prologue_cost
= 0;
806 gcc_assert (cost_vec
!= NULL
);
808 /* ??? Somehow we need to fix this at the callers. */
810 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
813 /* Cost the "broadcast" of a scalar operand in to a vector operand.
814 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
816 for (int i
= 0; i
< ndts
; i
++)
817 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
818 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
819 stmt_info
, 0, vect_prologue
);
821 /* Adjust for two-operator SLP nodes. */
822 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
825 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
826 stmt_info
, 0, vect_body
);
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
831 stmt_info
, 0, vect_body
);
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE
, vect_location
,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
840 /* Model cost for type demotion and promotion operations. PWR is
841 normally zero for single-step promotions and demotions. It will be
842 one if two-step promotion/demotion is required, and so on. NCOPIES
843 is the number of vector results (and thus number of instructions)
844 for the narrowest end of the operation chain. Each additional
845 step doubles the number of instructions required. */
848 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
849 enum vect_def_type
*dt
,
850 unsigned int ncopies
, int pwr
,
851 stmt_vector_for_cost
*cost_vec
)
854 int inside_cost
= 0, prologue_cost
= 0;
856 for (i
= 0; i
< pwr
+ 1; i
++)
858 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_promote_demote
,
859 stmt_info
, 0, vect_body
);
863 /* FORNOW: Assuming maximum 2 args per stmts. */
864 for (i
= 0; i
< 2; i
++)
865 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
866 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
867 stmt_info
, 0, vect_prologue
);
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE
, vect_location
,
871 "vect_model_promotion_demotion_cost: inside_cost = %d, "
872 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
875 /* Returns true if the current function returns DECL. */
878 cfun_returns (tree decl
)
882 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
884 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
887 if (gimple_return_retval (ret
) == decl
)
889 /* We often end up with an aggregate copy to the result decl,
890 handle that case as well. First skip intermediate clobbers
895 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
897 while (gimple_clobber_p (def
));
898 if (is_a
<gassign
*> (def
)
899 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
900 && gimple_assign_rhs1 (def
) == decl
)
906 /* Function vect_model_store_cost
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
912 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
913 vect_memory_access_type memory_access_type
,
914 vec_load_store_type vls_type
, slp_tree slp_node
,
915 stmt_vector_for_cost
*cost_vec
)
917 unsigned int inside_cost
= 0, prologue_cost
= 0;
918 stmt_vec_info first_stmt_info
= stmt_info
;
919 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
921 /* ??? Somehow we need to fix this at the callers. */
923 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
925 if (vls_type
== VLS_STORE_INVARIANT
)
928 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
929 stmt_info
, 0, vect_prologue
);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node
&& grouped_access_p
)
935 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
947 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
949 /* Uses a high and low interleave or shuffle operations for each
951 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
952 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
953 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
954 stmt_info
, 0, vect_body
);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE
, vect_location
,
958 "vect_model_store_cost: strided group_size = %d .\n",
962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
963 /* Costs of the stores. */
964 if (memory_access_type
== VMAT_ELEMENTWISE
965 || memory_access_type
== VMAT_GATHER_SCATTER
)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
969 inside_cost
+= record_stmt_cost (cost_vec
,
970 ncopies
* assumed_nunits
,
971 scalar_store
, stmt_info
, 0, vect_body
);
974 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, &inside_cost
, cost_vec
);
976 if (memory_access_type
== VMAT_ELEMENTWISE
977 || memory_access_type
== VMAT_STRIDED_SLP
)
979 /* N scalar stores plus extracting the elements. */
980 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
981 inside_cost
+= record_stmt_cost (cost_vec
,
982 ncopies
* assumed_nunits
,
983 vec_to_scalar
, stmt_info
, 0, vect_body
);
986 /* When vectorizing a store into the function result assign
987 a penalty if the function returns in a multi-register location.
988 In this case we assume we'll end up with having to spill the
989 vector result and do piecewise loads as a conservative estimate. */
990 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
992 && (TREE_CODE (base
) == RESULT_DECL
993 || (DECL_P (base
) && cfun_returns (base
)))
994 && !aggregate_value_p (base
, cfun
->decl
))
996 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
997 /* ??? Handle PARALLEL in some way. */
1000 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1001 /* Assume that a single reg-reg move is possible and cheap,
1002 do not account for vector to gp register move cost. */
1006 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1008 stmt_info
, 0, vect_epilogue
);
1010 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1012 stmt_info
, 0, vect_epilogue
);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE
, vect_location
,
1019 "vect_model_store_cost: inside_cost = %d, "
1020 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1024 /* Calculate cost of DR's memory access. */
1026 vect_get_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1027 unsigned int *inside_cost
,
1028 stmt_vector_for_cost
*body_cost_vec
)
1030 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1031 int alignment_support_scheme
1032 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1034 switch (alignment_support_scheme
)
1038 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1039 vector_store
, stmt_info
, 0,
1042 if (dump_enabled_p ())
1043 dump_printf_loc (MSG_NOTE
, vect_location
,
1044 "vect_model_store_cost: aligned.\n");
1048 case dr_unaligned_supported
:
1050 /* Here, we assign an additional cost for the unaligned store. */
1051 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1052 unaligned_store
, stmt_info
,
1053 DR_MISALIGNMENT (dr_info
),
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_NOTE
, vect_location
,
1057 "vect_model_store_cost: unaligned supported by "
1062 case dr_unaligned_unsupported
:
1064 *inside_cost
= VECT_MAX_COST
;
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1068 "vect_model_store_cost: unsupported access.\n");
1078 /* Function vect_model_load_cost
1080 Models cost for loads. In the case of grouped accesses, one access has
1081 the overhead of the grouped access attributed to it. Since unaligned
1082 accesses are supported for loads, we also account for the costs of the
1083 access scheme chosen. */
1086 vect_model_load_cost (vec_info
*vinfo
,
1087 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1088 vect_memory_access_type memory_access_type
,
1090 stmt_vector_for_cost
*cost_vec
)
1092 unsigned int inside_cost
= 0, prologue_cost
= 0;
1093 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1095 gcc_assert (cost_vec
);
1097 /* ??? Somehow we need to fix this at the callers. */
1099 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1101 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1106 /* Record the cost for the permutation. */
1108 unsigned assumed_nunits
1109 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1110 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1111 vf
, true, &n_perms
);
1112 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1113 first_stmt_info
, 0, vect_body
);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1117 bitmap_clear (perm
);
1118 for (unsigned i
= 0;
1119 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1120 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1122 bool load_seen
= false;
1123 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1125 if (i
% assumed_nunits
== 0)
1131 if (bitmap_bit_p (perm
, i
))
1137 <= (DR_GROUP_SIZE (first_stmt_info
)
1138 - DR_GROUP_GAP (first_stmt_info
)
1139 + assumed_nunits
- 1) / assumed_nunits
);
1142 /* Grouped loads read all elements in the group at once,
1143 so we want the DR for the first statement. */
1144 stmt_vec_info first_stmt_info
= stmt_info
;
1145 if (!slp_node
&& grouped_access_p
)
1146 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1148 /* True if we should include any once-per-group costs as well as
1149 the cost of the statement itself. For SLP we only get called
1150 once per group anyhow. */
1151 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1158 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1163 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1164 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1165 stmt_info
, 0, vect_body
);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE
, vect_location
,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1173 /* The loads themselves. */
1174 if (memory_access_type
== VMAT_ELEMENTWISE
1175 || memory_access_type
== VMAT_GATHER_SCATTER
)
1177 /* N scalar loads plus gathering them into a vector. */
1178 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1179 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1180 inside_cost
+= record_stmt_cost (cost_vec
,
1181 ncopies
* assumed_nunits
,
1182 scalar_load
, stmt_info
, 0, vect_body
);
1185 vect_get_load_cost (vinfo
, stmt_info
, ncopies
, first_stmt_p
,
1186 &inside_cost
, &prologue_cost
,
1187 cost_vec
, cost_vec
, true);
1188 if (memory_access_type
== VMAT_ELEMENTWISE
1189 || memory_access_type
== VMAT_STRIDED_SLP
)
1190 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1191 stmt_info
, 0, vect_body
);
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_NOTE
, vect_location
,
1195 "vect_model_load_cost: inside_cost = %d, "
1196 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1200 /* Calculate cost of DR's memory access. */
1202 vect_get_load_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1203 bool add_realign_cost
, unsigned int *inside_cost
,
1204 unsigned int *prologue_cost
,
1205 stmt_vector_for_cost
*prologue_cost_vec
,
1206 stmt_vector_for_cost
*body_cost_vec
,
1207 bool record_prologue_costs
)
1209 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1210 int alignment_support_scheme
1211 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1213 switch (alignment_support_scheme
)
1217 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1218 stmt_info
, 0, vect_body
);
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE
, vect_location
,
1222 "vect_model_load_cost: aligned.\n");
1226 case dr_unaligned_supported
:
1228 /* Here, we assign an additional cost for the unaligned load. */
1229 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1230 unaligned_load
, stmt_info
,
1231 DR_MISALIGNMENT (dr_info
),
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_NOTE
, vect_location
,
1236 "vect_model_load_cost: unaligned supported by "
1241 case dr_explicit_realign
:
1243 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1244 vector_load
, stmt_info
, 0, vect_body
);
1245 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1246 vec_perm
, stmt_info
, 0, vect_body
);
1248 /* FIXME: If the misalignment remains fixed across the iterations of
1249 the containing loop, the following cost should be added to the
1251 if (targetm
.vectorize
.builtin_mask_for_load
)
1252 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1253 stmt_info
, 0, vect_body
);
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE
, vect_location
,
1257 "vect_model_load_cost: explicit realign\n");
1261 case dr_explicit_realign_optimized
:
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE
, vect_location
,
1265 "vect_model_load_cost: unaligned software "
1268 /* Unaligned software pipeline has a load of an address, an initial
1269 load, and possibly a mask operation to "prime" the loop. However,
1270 if this is an access in a group of loads, which provide grouped
1271 access, then the above cost should only be considered for one
1272 access in the group. Inside the loop, there is a load op
1273 and a realignment op. */
1275 if (add_realign_cost
&& record_prologue_costs
)
1277 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1278 vector_stmt
, stmt_info
,
1280 if (targetm
.vectorize
.builtin_mask_for_load
)
1281 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1282 vector_stmt
, stmt_info
,
1286 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1287 stmt_info
, 0, vect_body
);
1288 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1289 stmt_info
, 0, vect_body
);
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE
, vect_location
,
1293 "vect_model_load_cost: explicit realign optimized"
1299 case dr_unaligned_unsupported
:
1301 *inside_cost
= VECT_MAX_COST
;
1303 if (dump_enabled_p ())
1304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1305 "vect_model_load_cost: unsupported access.\n");
1314 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1315 the loop preheader for the vectorized stmt STMT_VINFO. */
1318 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1319 gimple_stmt_iterator
*gsi
)
1322 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1325 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1329 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1333 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1336 pe
= loop_preheader_edge (loop
);
1337 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1338 gcc_assert (!new_bb
);
1342 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
1343 gimple_stmt_iterator gsi_region_begin
= bb_vinfo
->region_begin
;
1344 gsi_insert_before (&gsi_region_begin
, new_stmt
, GSI_SAME_STMT
);
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE
, vect_location
,
1350 "created new init_stmt: %G", new_stmt
);
1353 /* Function vect_init_vector.
1355 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1356 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1357 vector type a vector with all elements equal to VAL is created first.
1358 Place the initialization at GSI if it is not NULL. Otherwise, place the
1359 initialization at the loop preheader.
1360 Return the DEF of INIT_STMT.
1361 It will be used in the vectorization of STMT_INFO. */
1364 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1365 gimple_stmt_iterator
*gsi
)
1370 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1371 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1373 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1374 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1376 /* Scalar boolean value should be transformed into
1377 all zeros or all ones value before building a vector. */
1378 if (VECTOR_BOOLEAN_TYPE_P (type
))
1380 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1381 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1383 if (CONSTANT_CLASS_P (val
))
1384 val
= integer_zerop (val
) ? false_val
: true_val
;
1387 new_temp
= make_ssa_name (TREE_TYPE (type
));
1388 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1389 val
, true_val
, false_val
);
1390 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1396 gimple_seq stmts
= NULL
;
1397 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1398 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1399 TREE_TYPE (type
), val
);
1401 /* ??? Condition vectorization expects us to do
1402 promotion of invariant/external defs. */
1403 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1404 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1405 !gsi_end_p (gsi2
); )
1407 init_stmt
= gsi_stmt (gsi2
);
1408 gsi_remove (&gsi2
, false);
1409 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1413 val
= build_vector_from_val (type
, val
);
1416 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1417 init_stmt
= gimple_build_assign (new_temp
, val
);
1418 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1422 /* Function vect_get_vec_def_for_operand_1.
1424 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1425 with type DT that will be used in the vectorized stmt. */
1428 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1429 enum vect_def_type dt
)
1432 stmt_vec_info vec_stmt_info
;
1436 /* operand is a constant or a loop invariant. */
1437 case vect_constant_def
:
1438 case vect_external_def
:
1439 /* Code should use vect_get_vec_def_for_operand. */
1442 /* Operand is defined by a loop header phi. In case of nested
1443 cycles we also may have uses of the backedge def. */
1444 case vect_reduction_def
:
1445 case vect_double_reduction_def
:
1446 case vect_nested_cycle
:
1447 case vect_induction_def
:
1448 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1449 || dt
== vect_nested_cycle
);
1452 /* operand is defined inside the loop. */
1453 case vect_internal_def
:
1455 /* Get the def from the vectorized stmt. */
1456 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1457 /* Get vectorized pattern statement. */
1459 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1460 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1461 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1462 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1463 gcc_assert (vec_stmt_info
);
1464 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1465 vec_oprnd
= PHI_RESULT (phi
);
1467 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1477 /* Function vect_get_vec_def_for_operand.
1479 OP is an operand in STMT_VINFO. This function returns a (vector) def
1480 that will be used in the vectorized stmt for STMT_VINFO.
1482 In the case that OP is an SSA_NAME which is defined in the loop, then
1483 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1485 In case OP is an invariant or constant, a new stmt that creates a vector def
1486 needs to be introduced. VECTYPE may be used to specify a required type for
1487 vector invariant. */
1490 vect_get_vec_def_for_operand (vec_info
*vinfo
,
1491 tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1494 enum vect_def_type dt
;
1496 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1498 if (dump_enabled_p ())
1499 dump_printf_loc (MSG_NOTE
, vect_location
,
1500 "vect_get_vec_def_for_operand: %T\n", op
);
1502 stmt_vec_info def_stmt_info
;
1503 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1504 &def_stmt_info
, &def_stmt
);
1505 gcc_assert (is_simple_use
);
1506 if (def_stmt
&& dump_enabled_p ())
1507 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1509 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1511 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1515 vector_type
= vectype
;
1516 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1517 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1518 vector_type
= truth_type_for (stmt_vectype
);
1520 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1522 gcc_assert (vector_type
);
1523 return vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1526 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1530 /* Function vect_get_vec_def_for_stmt_copy
1532 Return a vector-def for an operand. This function is used when the
1533 vectorized stmt to be created (by the caller to this function) is a "copy"
1534 created in case the vectorized result cannot fit in one vector, and several
1535 copies of the vector-stmt are required. In this case the vector-def is
1536 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1537 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1540 In case the vectorization factor (VF) is bigger than the number
1541 of elements that can fit in a vectype (nunits), we have to generate
1542 more than one vector stmt to vectorize the scalar stmt. This situation
1543 arises when there are multiple data-types operated upon in the loop; the
1544 smallest data-type determines the VF, and as a result, when vectorizing
1545 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1546 vector stmt (each computing a vector of 'nunits' results, and together
1547 computing 'VF' results in each iteration). This function is called when
1548 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1549 which VF=16 and nunits=4, so the number of copies required is 4):
1551 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1553 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1554 VS1.1: vx.1 = memref1 VS1.2
1555 VS1.2: vx.2 = memref2 VS1.3
1556 VS1.3: vx.3 = memref3
1558 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1559 VSnew.1: vz1 = vx.1 + ... VSnew.2
1560 VSnew.2: vz2 = vx.2 + ... VSnew.3
1561 VSnew.3: vz3 = vx.3 + ...
1563 The vectorization of S1 is explained in vectorizable_load.
1564 The vectorization of S2:
1565 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1566 the function 'vect_get_vec_def_for_operand' is called to
1567 get the relevant vector-def for each operand of S2. For operand x it
1568 returns the vector-def 'vx.0'.
1570 To create the remaining copies of the vector-stmt (VSnew.j), this
1571 function is called to get the relevant vector-def for each operand. It is
1572 obtained from the respective VS1.j stmt, which is recorded in the
1573 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1575 For example, to obtain the vector-def 'vx.1' in order to create the
1576 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1577 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1578 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1579 and return its def ('vx.1').
1580 Overall, to create the above sequence this function will be called 3 times:
1581 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1582 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1583 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1586 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1588 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1590 /* Do nothing; can reuse same def. */
1593 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1594 gcc_assert (def_stmt_info
);
1595 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1596 vec_oprnd
= PHI_RESULT (phi
);
1598 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1603 /* Get vectorized definitions for the operands to create a copy of an original
1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1607 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1608 vec
<tree
> *vec_oprnds0
,
1609 vec
<tree
> *vec_oprnds1
)
1611 tree vec_oprnd
= vec_oprnds0
->pop ();
1613 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1614 vec_oprnds0
->quick_push (vec_oprnd
);
1616 if (vec_oprnds1
&& vec_oprnds1
->length ())
1618 vec_oprnd
= vec_oprnds1
->pop ();
1619 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1620 vec_oprnds1
->quick_push (vec_oprnd
);
1625 /* Get vectorized definitions for OP0 and OP1. */
1628 vect_get_vec_defs (vec_info
*vinfo
, tree op0
, tree op1
, stmt_vec_info stmt_info
,
1629 vec
<tree
> *vec_oprnds0
,
1630 vec
<tree
> *vec_oprnds1
,
1635 auto_vec
<vec
<tree
> > vec_defs (SLP_TREE_CHILDREN (slp_node
).length ());
1636 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
, op1
? 2 : 1);
1637 *vec_oprnds0
= vec_defs
[0];
1639 *vec_oprnds1
= vec_defs
[1];
1645 vec_oprnds0
->create (1);
1646 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, op0
, stmt_info
);
1647 vec_oprnds0
->quick_push (vec_oprnd
);
1651 vec_oprnds1
->create (1);
1652 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, op1
, stmt_info
);
1653 vec_oprnds1
->quick_push (vec_oprnd
);
1658 /* Helper function called by vect_finish_replace_stmt and
1659 vect_finish_stmt_generation. Set the location of the new
1660 statement and create and return a stmt_vec_info for it. */
1662 static stmt_vec_info
1663 vect_finish_stmt_generation_1 (vec_info
*vinfo
,
1664 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1666 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1668 if (dump_enabled_p ())
1669 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1671 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1673 /* While EH edges will generally prevent vectorization, stmt might
1674 e.g. be in a must-not-throw region. Ensure newly created stmts
1675 that could throw are part of the same region. */
1676 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1677 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1678 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1680 return vec_stmt_info
;
1683 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1684 which sets the same scalar result as STMT_INFO did. Create and return a
1685 stmt_vec_info for VEC_STMT. */
1688 vect_finish_replace_stmt (vec_info
*vinfo
,
1689 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1691 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1692 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1694 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1695 gsi_replace (&gsi
, vec_stmt
, true);
1697 return vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1700 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1701 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1704 vect_finish_stmt_generation (vec_info
*vinfo
,
1705 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1706 gimple_stmt_iterator
*gsi
)
1708 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1710 if (!gsi_end_p (*gsi
)
1711 && gimple_has_mem_ops (vec_stmt
))
1713 gimple
*at_stmt
= gsi_stmt (*gsi
);
1714 tree vuse
= gimple_vuse (at_stmt
);
1715 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1717 tree vdef
= gimple_vdef (at_stmt
);
1718 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1719 /* If we have an SSA vuse and insert a store, update virtual
1720 SSA form to avoid triggering the renamer. Do so only
1721 if we can easily see all uses - which is what almost always
1722 happens with the way vectorized stmts are inserted. */
1723 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1724 && ((is_gimple_assign (vec_stmt
)
1725 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1726 || (is_gimple_call (vec_stmt
)
1727 && !(gimple_call_flags (vec_stmt
)
1728 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1730 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1731 gimple_set_vdef (vec_stmt
, new_vdef
);
1732 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1736 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1737 return vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1740 /* We want to vectorize a call to combined function CFN with function
1741 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1742 as the types of all inputs. Check whether this is possible using
1743 an internal function, returning its code if so or IFN_LAST if not. */
1746 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1747 tree vectype_out
, tree vectype_in
)
1750 if (internal_fn_p (cfn
))
1751 ifn
= as_internal_fn (cfn
);
1753 ifn
= associated_internal_fn (fndecl
);
1754 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1756 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1757 if (info
.vectorizable
)
1759 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1760 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1761 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1762 OPTIMIZE_FOR_SPEED
))
1770 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1771 gimple_stmt_iterator
*);
1773 /* Check whether a load or store statement in the loop described by
1774 LOOP_VINFO is possible in a fully-masked loop. This is testing
1775 whether the vectorizer pass has the appropriate support, as well as
1776 whether the target does.
1778 VLS_TYPE says whether the statement is a load or store and VECTYPE
1779 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1780 says how the load or store is going to be implemented and GROUP_SIZE
1781 is the number of load or store statements in the containing group.
1782 If the access is a gather load or scatter store, GS_INFO describes
1783 its arguments. If the load or store is conditional, SCALAR_MASK is the
1784 condition under which it occurs.
1786 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1787 supported, otherwise record the required mask types. */
1790 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1791 vec_load_store_type vls_type
, int group_size
,
1792 vect_memory_access_type memory_access_type
,
1793 gather_scatter_info
*gs_info
, tree scalar_mask
)
1795 /* Invariant loads need no special support. */
1796 if (memory_access_type
== VMAT_INVARIANT
)
1799 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1800 machine_mode vecmode
= TYPE_MODE (vectype
);
1801 bool is_load
= (vls_type
== VLS_LOAD
);
1802 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1805 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1806 : !vect_store_lanes_supported (vectype
, group_size
, true))
1808 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1810 "can't use a fully-masked loop because the"
1811 " target doesn't have an appropriate masked"
1812 " load/store-lanes instruction.\n");
1813 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1816 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1817 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1821 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1823 internal_fn ifn
= (is_load
1824 ? IFN_MASK_GATHER_LOAD
1825 : IFN_MASK_SCATTER_STORE
);
1826 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1827 gs_info
->memory_type
,
1828 gs_info
->offset_vectype
,
1831 if (dump_enabled_p ())
1832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1833 "can't use a fully-masked loop because the"
1834 " target doesn't have an appropriate masked"
1835 " gather load or scatter store instruction.\n");
1836 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1839 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1840 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1844 if (memory_access_type
!= VMAT_CONTIGUOUS
1845 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1847 /* Element X of the data must come from iteration i * VF + X of the
1848 scalar loop. We need more work to support other mappings. */
1849 if (dump_enabled_p ())
1850 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1851 "can't use a fully-masked loop because an access"
1852 " isn't contiguous.\n");
1853 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1857 machine_mode mask_mode
;
1858 if (!VECTOR_MODE_P (vecmode
)
1859 || !targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1860 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1862 if (dump_enabled_p ())
1863 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1864 "can't use a fully-masked loop because the target"
1865 " doesn't have the appropriate masked load or"
1867 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1870 /* We might load more scalars than we need for permuting SLP loads.
1871 We checked in get_group_load_store_type that the extra elements
1872 don't leak into a new vector. */
1873 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1874 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1875 unsigned int nvectors
;
1876 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1877 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1882 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1883 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1884 that needs to be applied to all loads and stores in a vectorized loop.
1885 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1887 MASK_TYPE is the type of both masks. If new statements are needed,
1888 insert them before GSI. */
1891 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1892 gimple_stmt_iterator
*gsi
)
1894 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1898 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1899 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1900 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1901 vec_mask
, loop_mask
);
1902 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1906 /* Determine whether we can use a gather load or scatter store to vectorize
1907 strided load or store STMT_INFO by truncating the current offset to a
1908 smaller width. We need to be able to construct an offset vector:
1910 { 0, X, X*2, X*3, ... }
1912 without loss of precision, where X is STMT_INFO's DR_STEP.
1914 Return true if this is possible, describing the gather load or scatter
1915 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1918 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1919 loop_vec_info loop_vinfo
, bool masked_p
,
1920 gather_scatter_info
*gs_info
)
1922 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1923 data_reference
*dr
= dr_info
->dr
;
1924 tree step
= DR_STEP (dr
);
1925 if (TREE_CODE (step
) != INTEGER_CST
)
1927 /* ??? Perhaps we could use range information here? */
1928 if (dump_enabled_p ())
1929 dump_printf_loc (MSG_NOTE
, vect_location
,
1930 "cannot truncate variable step.\n");
1934 /* Get the number of bits in an element. */
1935 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1936 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1937 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1939 /* Set COUNT to the upper limit on the number of elements - 1.
1940 Start with the maximum vectorization factor. */
1941 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1943 /* Try lowering COUNT to the number of scalar latch iterations. */
1944 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1945 widest_int max_iters
;
1946 if (max_loop_iterations (loop
, &max_iters
)
1947 && max_iters
< count
)
1948 count
= max_iters
.to_shwi ();
1950 /* Try scales of 1 and the element size. */
1951 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1952 wi::overflow_type overflow
= wi::OVF_NONE
;
1953 for (int i
= 0; i
< 2; ++i
)
1955 int scale
= scales
[i
];
1957 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1960 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1961 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1964 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1965 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1967 /* Find the narrowest viable offset type. */
1968 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1969 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1972 /* See whether the target supports the operation with an offset
1973 no narrower than OFFSET_TYPE. */
1974 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1975 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1976 vectype
, memory_type
, offset_type
, scale
,
1977 &gs_info
->ifn
, &gs_info
->offset_vectype
))
1980 gs_info
->decl
= NULL_TREE
;
1981 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1982 but we don't need to store that here. */
1983 gs_info
->base
= NULL_TREE
;
1984 gs_info
->element_type
= TREE_TYPE (vectype
);
1985 gs_info
->offset
= fold_convert (offset_type
, step
);
1986 gs_info
->offset_dt
= vect_constant_def
;
1987 gs_info
->scale
= scale
;
1988 gs_info
->memory_type
= memory_type
;
1992 if (overflow
&& dump_enabled_p ())
1993 dump_printf_loc (MSG_NOTE
, vect_location
,
1994 "truncating gather/scatter offset to %d bits"
1995 " might change its value.\n", element_bits
);
2000 /* Return true if we can use gather/scatter internal functions to
2001 vectorize STMT_INFO, which is a grouped or strided load or store.
2002 MASKED_P is true if load or store is conditional. When returning
2003 true, fill in GS_INFO with the information required to perform the
2007 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2008 loop_vec_info loop_vinfo
, bool masked_p
,
2009 gather_scatter_info
*gs_info
)
2011 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2013 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2016 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
2017 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2019 gcc_assert (TYPE_PRECISION (new_offset_type
)
2020 >= TYPE_PRECISION (old_offset_type
));
2021 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
2023 if (dump_enabled_p ())
2024 dump_printf_loc (MSG_NOTE
, vect_location
,
2025 "using gather/scatter for strided/grouped access,"
2026 " scale = %d\n", gs_info
->scale
);
2031 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2032 elements with a known constant step. Return -1 if that step
2033 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2036 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
2038 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2039 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
2043 /* If the target supports a permute mask that reverses the elements in
2044 a vector of type VECTYPE, return that mask, otherwise return null. */
2047 perm_mask_for_reverse (tree vectype
)
2049 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2051 /* The encoding has a single stepped pattern. */
2052 vec_perm_builder
sel (nunits
, 1, 3);
2053 for (int i
= 0; i
< 3; ++i
)
2054 sel
.quick_push (nunits
- 1 - i
);
2056 vec_perm_indices
indices (sel
, 1, nunits
);
2057 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2059 return vect_gen_perm_mask_checked (vectype
, indices
);
2062 /* A subroutine of get_load_store_type, with a subset of the same
2063 arguments. Handle the case where STMT_INFO is a load or store that
2064 accesses consecutive elements with a negative step. */
2066 static vect_memory_access_type
2067 get_negative_load_store_type (vec_info
*vinfo
,
2068 stmt_vec_info stmt_info
, tree vectype
,
2069 vec_load_store_type vls_type
,
2070 unsigned int ncopies
)
2072 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2073 dr_alignment_support alignment_support_scheme
;
2077 if (dump_enabled_p ())
2078 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2079 "multiple types with negative step.\n");
2080 return VMAT_ELEMENTWISE
;
2083 alignment_support_scheme
= vect_supportable_dr_alignment (vinfo
,
2085 if (alignment_support_scheme
!= dr_aligned
2086 && alignment_support_scheme
!= dr_unaligned_supported
)
2088 if (dump_enabled_p ())
2089 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2090 "negative step but alignment required.\n");
2091 return VMAT_ELEMENTWISE
;
2094 if (vls_type
== VLS_STORE_INVARIANT
)
2096 if (dump_enabled_p ())
2097 dump_printf_loc (MSG_NOTE
, vect_location
,
2098 "negative step with invariant source;"
2099 " no permute needed.\n");
2100 return VMAT_CONTIGUOUS_DOWN
;
2103 if (!perm_mask_for_reverse (vectype
))
2105 if (dump_enabled_p ())
2106 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2107 "negative step and reversing not supported.\n");
2108 return VMAT_ELEMENTWISE
;
2111 return VMAT_CONTIGUOUS_REVERSE
;
2114 /* STMT_INFO is either a masked or unconditional store. Return the value
2118 vect_get_store_rhs (stmt_vec_info stmt_info
)
2120 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2122 gcc_assert (gimple_assign_single_p (assign
));
2123 return gimple_assign_rhs1 (assign
);
2125 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2127 internal_fn ifn
= gimple_call_internal_fn (call
);
2128 int index
= internal_fn_stored_value_index (ifn
);
2129 gcc_assert (index
>= 0);
2130 return gimple_call_arg (call
, index
);
2135 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2137 This function returns a vector type which can be composed with NETLS pieces,
2138 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2139 same vector size as the return vector. It checks target whether supports
2140 pieces-size vector mode for construction firstly, if target fails to, check
2141 pieces-size scalar mode for construction further. It returns NULL_TREE if
2142 fails to find the available composition.
2144 For example, for (vtype=V16QI, nelts=4), we can probably get:
2145 - V16QI with PTYPE V4QI.
2146 - V4SI with PTYPE SI.
2150 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2152 gcc_assert (VECTOR_TYPE_P (vtype
));
2153 gcc_assert (known_gt (nelts
, 0U));
2155 machine_mode vmode
= TYPE_MODE (vtype
);
2156 if (!VECTOR_MODE_P (vmode
))
2159 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2160 unsigned int pbsize
;
2161 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2163 /* First check if vec_init optab supports construction from
2164 vector pieces directly. */
2165 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2166 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2168 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2169 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2170 != CODE_FOR_nothing
))
2172 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2176 /* Otherwise check if exists an integer type of the same piece size and
2177 if vec_init optab supports construction from it directly. */
2178 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2179 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2180 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2181 != CODE_FOR_nothing
))
2183 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2184 return build_vector_type (*ptype
, nelts
);
2191 /* A subroutine of get_load_store_type, with a subset of the same
2192 arguments. Handle the case where STMT_INFO is part of a grouped load
2195 For stores, the statements in the group are all consecutive
2196 and there is no gap at the end. For loads, the statements in the
2197 group might not be consecutive; there can be gaps between statements
2198 as well as at the end. */
2201 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2202 tree vectype
, bool slp
,
2203 bool masked_p
, vec_load_store_type vls_type
,
2204 vect_memory_access_type
*memory_access_type
,
2205 gather_scatter_info
*gs_info
)
2207 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2208 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2209 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2210 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2211 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2212 bool single_element_p
= (stmt_info
== first_stmt_info
2213 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2214 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2215 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2217 /* True if the vectorized statements would access beyond the last
2218 statement in the group. */
2219 bool overrun_p
= false;
2221 /* True if we can cope with such overrun by peeling for gaps, so that
2222 there is at least one final scalar iteration after the vector loop. */
2223 bool can_overrun_p
= (!masked_p
2224 && vls_type
== VLS_LOAD
2228 /* There can only be a gap at the end of the group if the stride is
2229 known at compile time. */
2230 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2232 /* Stores can't yet have gaps. */
2233 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2237 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2239 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2240 separated by the stride, until we have a complete vector.
2241 Fall back to scalar accesses if that isn't possible. */
2242 if (multiple_p (nunits
, group_size
))
2243 *memory_access_type
= VMAT_STRIDED_SLP
;
2245 *memory_access_type
= VMAT_ELEMENTWISE
;
2249 overrun_p
= loop_vinfo
&& gap
!= 0;
2250 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2252 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2253 "Grouped store with gaps requires"
2254 " non-consecutive accesses\n");
2257 /* An overrun is fine if the trailing elements are smaller
2258 than the alignment boundary B. Every vector access will
2259 be a multiple of B and so we are guaranteed to access a
2260 non-gap element in the same B-sized block. */
2262 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2263 / vect_get_scalar_dr_size (first_dr_info
)))
2266 /* If the gap splits the vector in half and the target
2267 can do half-vector operations avoid the epilogue peeling
2268 by simply loading half of the vector only. Usually
2269 the construction with an upper zero half will be elided. */
2270 dr_alignment_support alignment_support_scheme
;
2274 && (((alignment_support_scheme
2275 = vect_supportable_dr_alignment (vinfo
,
2276 first_dr_info
, false)))
2278 || alignment_support_scheme
== dr_unaligned_supported
)
2279 && known_eq (nunits
, (group_size
- gap
) * 2)
2280 && known_eq (nunits
, group_size
)
2281 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2285 if (overrun_p
&& !can_overrun_p
)
2287 if (dump_enabled_p ())
2288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2289 "Peeling for outer loop is not supported\n");
2292 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2294 *memory_access_type
= get_negative_load_store_type
2295 (vinfo
, stmt_info
, vectype
, vls_type
, 1);
2298 gcc_assert (!loop_vinfo
|| cmp
> 0);
2299 *memory_access_type
= VMAT_CONTIGUOUS
;
2305 /* We can always handle this case using elementwise accesses,
2306 but see if something more efficient is available. */
2307 *memory_access_type
= VMAT_ELEMENTWISE
;
2309 /* If there is a gap at the end of the group then these optimizations
2310 would access excess elements in the last iteration. */
2311 bool would_overrun_p
= (gap
!= 0);
2312 /* An overrun is fine if the trailing elements are smaller than the
2313 alignment boundary B. Every vector access will be a multiple of B
2314 and so we are guaranteed to access a non-gap element in the
2315 same B-sized block. */
2318 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2319 / vect_get_scalar_dr_size (first_dr_info
)))
2320 would_overrun_p
= false;
2322 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2323 && (can_overrun_p
|| !would_overrun_p
)
2324 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2326 /* First cope with the degenerate case of a single-element
2328 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2329 *memory_access_type
= VMAT_CONTIGUOUS
;
2331 /* Otherwise try using LOAD/STORE_LANES. */
2332 if (*memory_access_type
== VMAT_ELEMENTWISE
2333 && (vls_type
== VLS_LOAD
2334 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2335 : vect_store_lanes_supported (vectype
, group_size
,
2338 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2339 overrun_p
= would_overrun_p
;
2342 /* If that fails, try using permuting loads. */
2343 if (*memory_access_type
== VMAT_ELEMENTWISE
2344 && (vls_type
== VLS_LOAD
2345 ? vect_grouped_load_supported (vectype
, single_element_p
,
2347 : vect_grouped_store_supported (vectype
, group_size
)))
2349 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2350 overrun_p
= would_overrun_p
;
2354 /* As a last resort, trying using a gather load or scatter store.
2356 ??? Although the code can handle all group sizes correctly,
2357 it probably isn't a win to use separate strided accesses based
2358 on nearby locations. Or, even if it's a win over scalar code,
2359 it might not be a win over vectorizing at a lower VF, if that
2360 allows us to use contiguous accesses. */
2361 if (*memory_access_type
== VMAT_ELEMENTWISE
2364 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2366 *memory_access_type
= VMAT_GATHER_SCATTER
;
2369 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2371 /* STMT is the leader of the group. Check the operands of all the
2372 stmts of the group. */
2373 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2374 while (next_stmt_info
)
2376 tree op
= vect_get_store_rhs (next_stmt_info
);
2377 enum vect_def_type dt
;
2378 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2380 if (dump_enabled_p ())
2381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2382 "use not simple.\n");
2385 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2391 gcc_assert (can_overrun_p
);
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2394 "Data access with gaps requires scalar "
2396 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2402 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2403 if there is a memory access type that the vectorized form can use,
2404 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2405 or scatters, fill in GS_INFO accordingly.
2407 SLP says whether we're performing SLP rather than loop vectorization.
2408 MASKED_P is true if the statement is conditional on a vectorized mask.
2409 VECTYPE is the vector type that the vectorized statements will use.
2410 NCOPIES is the number of vector statements that will be needed. */
2413 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2414 tree vectype
, bool slp
,
2415 bool masked_p
, vec_load_store_type vls_type
,
2416 unsigned int ncopies
,
2417 vect_memory_access_type
*memory_access_type
,
2418 gather_scatter_info
*gs_info
)
2420 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2421 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2422 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2424 *memory_access_type
= VMAT_GATHER_SCATTER
;
2425 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2427 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2428 &gs_info
->offset_dt
,
2429 &gs_info
->offset_vectype
))
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2433 "%s index use not simple.\n",
2434 vls_type
== VLS_LOAD
? "gather" : "scatter");
2438 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2440 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp
, masked_p
,
2441 vls_type
, memory_access_type
, gs_info
))
2444 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2448 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2450 *memory_access_type
= VMAT_GATHER_SCATTER
;
2452 *memory_access_type
= VMAT_ELEMENTWISE
;
2456 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2458 *memory_access_type
= get_negative_load_store_type
2459 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
);
2462 gcc_assert (vls_type
== VLS_LOAD
);
2463 *memory_access_type
= VMAT_INVARIANT
;
2466 *memory_access_type
= VMAT_CONTIGUOUS
;
2469 if ((*memory_access_type
== VMAT_ELEMENTWISE
2470 || *memory_access_type
== VMAT_STRIDED_SLP
)
2471 && !nunits
.is_constant ())
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2475 "Not using elementwise accesses due to variable "
2476 "vectorization factor.\n");
2480 /* FIXME: At the moment the cost model seems to underestimate the
2481 cost of using elementwise accesses. This check preserves the
2482 traditional behavior until that can be fixed. */
2483 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2484 if (!first_stmt_info
)
2485 first_stmt_info
= stmt_info
;
2486 if (*memory_access_type
== VMAT_ELEMENTWISE
2487 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2488 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2489 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2490 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2492 if (dump_enabled_p ())
2493 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2494 "not falling back to elementwise accesses\n");
2500 /* Return true if boolean argument MASK is suitable for vectorizing
2501 conditional operation STMT_INFO. When returning true, store the type
2502 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2503 in *MASK_VECTYPE_OUT. */
2506 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree mask
,
2507 vect_def_type
*mask_dt_out
,
2508 tree
*mask_vectype_out
)
2510 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2514 "mask argument is not a boolean.\n");
2518 if (TREE_CODE (mask
) != SSA_NAME
)
2520 if (dump_enabled_p ())
2521 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2522 "mask argument is not an SSA name.\n");
2526 enum vect_def_type mask_dt
;
2528 if (!vect_is_simple_use (mask
, vinfo
, &mask_dt
, &mask_vectype
))
2530 if (dump_enabled_p ())
2531 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2532 "mask use not simple.\n");
2536 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2538 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2540 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2542 if (dump_enabled_p ())
2543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2544 "could not find an appropriate vector mask type.\n");
2548 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2549 TYPE_VECTOR_SUBPARTS (vectype
)))
2551 if (dump_enabled_p ())
2552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2553 "vector mask type %T"
2554 " does not match vector data type %T.\n",
2555 mask_vectype
, vectype
);
2560 *mask_dt_out
= mask_dt
;
2561 *mask_vectype_out
= mask_vectype
;
2565 /* Return true if stored value RHS is suitable for vectorizing store
2566 statement STMT_INFO. When returning true, store the type of the
2567 definition in *RHS_DT_OUT, the type of the vectorized store value in
2568 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2571 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree rhs
,
2572 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2573 vec_load_store_type
*vls_type_out
)
2575 /* In the case this is a store from a constant make sure
2576 native_encode_expr can handle it. */
2577 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2581 "cannot encode constant as a byte sequence.\n");
2585 enum vect_def_type rhs_dt
;
2587 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_dt
, &rhs_vectype
))
2589 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2591 "use not simple.\n");
2595 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2596 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2598 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2600 "incompatible vector types.\n");
2604 *rhs_dt_out
= rhs_dt
;
2605 *rhs_vectype_out
= rhs_vectype
;
2606 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2607 *vls_type_out
= VLS_STORE_INVARIANT
;
2609 *vls_type_out
= VLS_STORE
;
2613 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2614 Note that we support masks with floating-point type, in which case the
2615 floats are interpreted as a bitmask. */
2618 vect_build_all_ones_mask (vec_info
*vinfo
,
2619 stmt_vec_info stmt_info
, tree masktype
)
2621 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2622 return build_int_cst (masktype
, -1);
2623 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2625 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2626 mask
= build_vector_from_val (masktype
, mask
);
2627 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2629 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2633 for (int j
= 0; j
< 6; ++j
)
2635 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2636 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2637 mask
= build_vector_from_val (masktype
, mask
);
2638 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2643 /* Build an all-zero merge value of type VECTYPE while vectorizing
2644 STMT_INFO as a gather load. */
2647 vect_build_zero_merge_argument (vec_info
*vinfo
,
2648 stmt_vec_info stmt_info
, tree vectype
)
2651 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2652 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2653 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2657 for (int j
= 0; j
< 6; ++j
)
2659 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2660 merge
= build_real (TREE_TYPE (vectype
), r
);
2664 merge
= build_vector_from_val (vectype
, merge
);
2665 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2668 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2669 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2670 the gather load operation. If the load is conditional, MASK is the
2671 unvectorized condition and MASK_DT is its definition type, otherwise
2675 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2676 gimple_stmt_iterator
*gsi
,
2677 stmt_vec_info
*vec_stmt
,
2678 gather_scatter_info
*gs_info
,
2681 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2682 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2683 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2684 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2685 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2686 edge pe
= loop_preheader_edge (loop
);
2687 enum { NARROW
, NONE
, WIDEN
} modifier
;
2688 poly_uint64 gather_off_nunits
2689 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2691 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2692 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2693 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2694 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2695 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2696 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2697 tree scaletype
= TREE_VALUE (arglist
);
2698 tree real_masktype
= masktype
;
2699 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2701 || TREE_CODE (masktype
) == INTEGER_TYPE
2702 || types_compatible_p (srctype
, masktype
)));
2703 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2704 masktype
= truth_type_for (srctype
);
2706 tree mask_halftype
= masktype
;
2707 tree perm_mask
= NULL_TREE
;
2708 tree mask_perm_mask
= NULL_TREE
;
2709 if (known_eq (nunits
, gather_off_nunits
))
2711 else if (known_eq (nunits
* 2, gather_off_nunits
))
2715 /* Currently widening gathers and scatters are only supported for
2716 fixed-length vectors. */
2717 int count
= gather_off_nunits
.to_constant ();
2718 vec_perm_builder
sel (count
, count
, 1);
2719 for (int i
= 0; i
< count
; ++i
)
2720 sel
.quick_push (i
| (count
/ 2));
2722 vec_perm_indices
indices (sel
, 1, count
);
2723 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2726 else if (known_eq (nunits
, gather_off_nunits
* 2))
2730 /* Currently narrowing gathers and scatters are only supported for
2731 fixed-length vectors. */
2732 int count
= nunits
.to_constant ();
2733 vec_perm_builder
sel (count
, count
, 1);
2734 sel
.quick_grow (count
);
2735 for (int i
= 0; i
< count
; ++i
)
2736 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2737 vec_perm_indices
indices (sel
, 2, count
);
2738 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2742 if (mask
&& masktype
== real_masktype
)
2744 for (int i
= 0; i
< count
; ++i
)
2745 sel
[i
] = i
| (count
/ 2);
2746 indices
.new_vector (sel
, 2, count
);
2747 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2750 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2755 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2756 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2758 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2759 if (!is_gimple_min_invariant (ptr
))
2762 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2763 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2764 gcc_assert (!new_bb
);
2767 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2769 tree vec_oprnd0
= NULL_TREE
;
2770 tree vec_mask
= NULL_TREE
;
2771 tree src_op
= NULL_TREE
;
2772 tree mask_op
= NULL_TREE
;
2773 tree prev_res
= NULL_TREE
;
2774 stmt_vec_info prev_stmt_info
= NULL
;
2778 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2779 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2782 for (int j
= 0; j
< ncopies
; ++j
)
2785 if (modifier
== WIDEN
&& (j
& 1))
2786 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2787 perm_mask
, stmt_info
, gsi
);
2790 = vect_get_vec_def_for_operand (vinfo
, gs_info
->offset
, stmt_info
);
2792 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2795 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2797 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2798 TYPE_VECTOR_SUBPARTS (idxtype
)));
2799 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2800 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2801 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2802 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2808 if (mask_perm_mask
&& (j
& 1))
2809 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2810 mask_perm_mask
, stmt_info
, gsi
);
2814 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
);
2815 else if (modifier
!= NARROW
|| (j
& 1) == 0)
2816 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2820 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2822 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2823 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2824 gcc_assert (known_eq (sub1
, sub2
));
2825 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2826 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2828 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2829 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2833 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2835 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2837 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2838 : VEC_UNPACK_LO_EXPR
,
2840 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2846 tree mask_arg
= mask_op
;
2847 if (masktype
!= real_masktype
)
2849 tree utype
, optype
= TREE_TYPE (mask_op
);
2850 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2851 utype
= real_masktype
;
2853 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2854 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2855 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2857 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2858 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2860 if (!useless_type_conversion_p (real_masktype
, utype
))
2862 gcc_assert (TYPE_PRECISION (utype
)
2863 <= TYPE_PRECISION (real_masktype
));
2864 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2865 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2866 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2869 src_op
= build_zero_cst (srctype
);
2871 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2874 stmt_vec_info new_stmt_info
;
2875 if (!useless_type_conversion_p (vectype
, rettype
))
2877 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2878 TYPE_VECTOR_SUBPARTS (rettype
)));
2879 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2880 gimple_call_set_lhs (new_call
, op
);
2881 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
2882 var
= make_ssa_name (vec_dest
);
2883 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2884 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2886 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2890 var
= make_ssa_name (vec_dest
, new_call
);
2891 gimple_call_set_lhs (new_call
, var
);
2893 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
2896 if (modifier
== NARROW
)
2903 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2905 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2908 if (prev_stmt_info
== NULL
)
2909 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2911 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2912 prev_stmt_info
= new_stmt_info
;
2916 /* Prepare the base and offset in GS_INFO for vectorization.
2917 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2918 to the vectorized offset argument for the first copy of STMT_INFO.
2919 STMT_INFO is the statement described by GS_INFO and LOOP is the
2923 vect_get_gather_scatter_ops (vec_info
*vinfo
,
2924 class loop
*loop
, stmt_vec_info stmt_info
,
2925 gather_scatter_info
*gs_info
,
2926 tree
*dataref_ptr
, tree
*vec_offset
)
2928 gimple_seq stmts
= NULL
;
2929 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2933 edge pe
= loop_preheader_edge (loop
);
2934 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2935 gcc_assert (!new_bb
);
2937 *vec_offset
= vect_get_vec_def_for_operand (vinfo
, gs_info
->offset
, stmt_info
,
2938 gs_info
->offset_vectype
);
2941 /* Prepare to implement a grouped or strided load or store using
2942 the gather load or scatter store operation described by GS_INFO.
2943 STMT_INFO is the load or store statement.
2945 Set *DATAREF_BUMP to the amount that should be added to the base
2946 address after each copy of the vectorized statement. Set *VEC_OFFSET
2947 to an invariant offset vector in which element I has the value
2948 I * DR_STEP / SCALE. */
2951 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2952 loop_vec_info loop_vinfo
,
2953 gather_scatter_info
*gs_info
,
2954 tree
*dataref_bump
, tree
*vec_offset
)
2956 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2957 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2958 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2961 tree bump
= size_binop (MULT_EXPR
,
2962 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2963 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2964 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2966 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2968 /* The offset given in GS_INFO can have pointer type, so use the element
2969 type of the vector instead. */
2970 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2971 offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2973 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2974 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2975 ssize_int (gs_info
->scale
));
2976 step
= fold_convert (offset_type
, step
);
2977 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2979 /* Create {0, X, X*2, X*3, ...}. */
2980 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2981 build_zero_cst (offset_type
), step
);
2983 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2986 /* Return the amount that should be added to a vector pointer to move
2987 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2988 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2992 vect_get_data_ptr_increment (vec_info
*vinfo
,
2993 dr_vec_info
*dr_info
, tree aggr_type
,
2994 vect_memory_access_type memory_access_type
)
2996 if (memory_access_type
== VMAT_INVARIANT
)
2997 return size_zero_node
;
2999 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3000 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3001 if (tree_int_cst_sgn (step
) == -1)
3002 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3006 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3009 vectorizable_bswap (vec_info
*vinfo
,
3010 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3011 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3013 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3016 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3017 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3020 op
= gimple_call_arg (stmt
, 0);
3021 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3022 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3024 /* Multiple types in SLP are handled by creating the appropriate number of
3025 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3030 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3032 gcc_assert (ncopies
>= 1);
3034 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3038 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3039 unsigned word_bytes
;
3040 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3043 /* The encoding uses one stepped pattern for each byte in the word. */
3044 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3045 for (unsigned i
= 0; i
< 3; ++i
)
3046 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3047 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3049 vec_perm_indices
indices (elts
, 1, num_bytes
);
3050 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3056 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3058 if (dump_enabled_p ())
3059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3060 "incompatible vector types for invariants\n");
3064 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3065 DUMP_VECT_SCOPE ("vectorizable_bswap");
3068 record_stmt_cost (cost_vec
,
3069 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3070 record_stmt_cost (cost_vec
,
3071 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3076 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3079 vec
<tree
> vec_oprnds
= vNULL
;
3080 stmt_vec_info new_stmt_info
= NULL
;
3081 stmt_vec_info prev_stmt_info
= NULL
;
3082 for (unsigned j
= 0; j
< ncopies
; j
++)
3086 vect_get_vec_defs (vinfo
, op
, NULL
, stmt_info
, &vec_oprnds
, NULL
,
3089 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3091 /* Arguments are ready. create the new vector stmt. */
3094 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3097 tree tem
= make_ssa_name (char_vectype
);
3098 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3099 char_vectype
, vop
));
3100 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3101 tree tem2
= make_ssa_name (char_vectype
);
3102 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3103 tem
, tem
, bswap_vconst
);
3104 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3105 tem
= make_ssa_name (vectype
);
3106 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3109 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3111 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3118 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3120 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3122 prev_stmt_info
= new_stmt_info
;
3125 vec_oprnds
.release ();
3129 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3130 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3131 in a single step. On success, store the binary pack code in
3135 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3136 tree_code
*convert_code
)
3138 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3139 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3143 int multi_step_cvt
= 0;
3144 auto_vec
<tree
, 8> interm_types
;
3145 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3146 &code
, &multi_step_cvt
, &interm_types
)
3150 *convert_code
= code
;
3154 /* Function vectorizable_call.
3156 Check if STMT_INFO performs a function call that can be vectorized.
3157 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3158 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3159 Return true if STMT_INFO is vectorizable in this way. */
3162 vectorizable_call (vec_info
*vinfo
,
3163 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3164 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3165 stmt_vector_for_cost
*cost_vec
)
3171 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3172 stmt_vec_info prev_stmt_info
;
3173 tree vectype_out
, vectype_in
;
3174 poly_uint64 nunits_in
;
3175 poly_uint64 nunits_out
;
3176 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3177 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3178 tree fndecl
, new_temp
, rhs_type
;
3179 enum vect_def_type dt
[4]
3180 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3181 vect_unknown_def_type
};
3182 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3183 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3184 int ndts
= ARRAY_SIZE (dt
);
3186 auto_vec
<tree
, 8> vargs
;
3187 auto_vec
<tree
, 8> orig_vargs
;
3188 enum { NARROW
, NONE
, WIDEN
} modifier
;
3192 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3195 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3199 /* Is STMT_INFO a vectorizable call? */
3200 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3204 if (gimple_call_internal_p (stmt
)
3205 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3206 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3207 /* Handled by vectorizable_load and vectorizable_store. */
3210 if (gimple_call_lhs (stmt
) == NULL_TREE
3211 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3214 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3216 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3218 /* Process function arguments. */
3219 rhs_type
= NULL_TREE
;
3220 vectype_in
= NULL_TREE
;
3221 nargs
= gimple_call_num_args (stmt
);
3223 /* Bail out if the function has more than four arguments, we do not have
3224 interesting builtin functions to vectorize with more than two arguments
3225 except for fma. No arguments is also not good. */
3226 if (nargs
== 0 || nargs
> 4)
3229 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3230 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3231 if (cfn
== CFN_GOMP_SIMD_LANE
)
3234 rhs_type
= unsigned_type_node
;
3238 if (internal_fn_p (cfn
))
3239 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3241 for (i
= 0; i
< nargs
; i
++)
3243 if ((int) i
== mask_opno
)
3245 op
= gimple_call_arg (stmt
, i
);
3246 if (!vect_check_scalar_mask (vinfo
,
3247 stmt_info
, op
, &dt
[i
], &vectypes
[i
]))
3252 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3253 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3257 "use not simple.\n");
3261 /* We can only handle calls with arguments of the same type. */
3263 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3267 "argument types differ.\n");
3271 rhs_type
= TREE_TYPE (op
);
3274 vectype_in
= vectypes
[i
];
3275 else if (vectypes
[i
]
3276 && !types_compatible_p (vectypes
[i
], vectype_in
))
3278 if (dump_enabled_p ())
3279 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3280 "argument vector types differ.\n");
3284 /* If all arguments are external or constant defs, infer the vector type
3285 from the scalar type. */
3287 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3289 gcc_assert (vectype_in
);
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3294 "no vectype for scalar type %T\n", rhs_type
);
3298 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3299 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3300 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3301 by a pack of the two vectors into an SI vector. We would need
3302 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3303 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3305 if (dump_enabled_p ())
3306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3307 "mismatched vector sizes %T and %T\n",
3308 vectype_in
, vectype_out
);
3312 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3313 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3315 if (dump_enabled_p ())
3316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3317 "mixed mask and nonmask vector types\n");
3322 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3323 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3324 if (known_eq (nunits_in
* 2, nunits_out
))
3326 else if (known_eq (nunits_out
, nunits_in
))
3328 else if (known_eq (nunits_out
* 2, nunits_in
))
3333 /* We only handle functions that do not read or clobber memory. */
3334 if (gimple_vuse (stmt
))
3336 if (dump_enabled_p ())
3337 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3338 "function reads from or writes to memory.\n");
3342 /* For now, we only vectorize functions if a target specific builtin
3343 is available. TODO -- in some cases, it might be profitable to
3344 insert the calls for pieces of the vector, in order to be able
3345 to vectorize other operations in the loop. */
3347 internal_fn ifn
= IFN_LAST
;
3348 tree callee
= gimple_call_fndecl (stmt
);
3350 /* First try using an internal function. */
3351 tree_code convert_code
= ERROR_MARK
;
3353 && (modifier
== NONE
3354 || (modifier
== NARROW
3355 && simple_integer_narrowing (vectype_out
, vectype_in
,
3357 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3360 /* If that fails, try asking for a target-specific built-in function. */
3361 if (ifn
== IFN_LAST
)
3363 if (cfn
!= CFN_LAST
)
3364 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3365 (cfn
, vectype_out
, vectype_in
);
3366 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3367 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3368 (callee
, vectype_out
, vectype_in
);
3371 if (ifn
== IFN_LAST
&& !fndecl
)
3373 if (cfn
== CFN_GOMP_SIMD_LANE
3376 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3377 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3378 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3379 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3381 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3382 { 0, 1, 2, ... vf - 1 } vector. */
3383 gcc_assert (nargs
== 0);
3385 else if (modifier
== NONE
3386 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3387 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3388 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3389 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3390 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3391 slp_op
, vectype_in
, cost_vec
);
3394 if (dump_enabled_p ())
3395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3396 "function is not vectorizable.\n");
3403 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3404 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3406 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3408 /* Sanity check: make sure that at least one copy of the vectorized stmt
3409 needs to be generated. */
3410 gcc_assert (ncopies
>= 1);
3412 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3413 if (!vec_stmt
) /* transformation not required. */
3416 for (i
= 0; i
< nargs
; ++i
)
3417 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3419 if (dump_enabled_p ())
3420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3421 "incompatible vector types for invariants\n");
3424 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3425 DUMP_VECT_SCOPE ("vectorizable_call");
3426 vect_model_simple_cost (vinfo
, stmt_info
,
3427 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3428 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3429 record_stmt_cost (cost_vec
, ncopies
/ 2,
3430 vec_promote_demote
, stmt_info
, 0, vect_body
);
3432 if (loop_vinfo
&& mask_opno
>= 0)
3434 unsigned int nvectors
= (slp_node
3435 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3437 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3438 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3439 vectype_out
, scalar_mask
);
3446 if (dump_enabled_p ())
3447 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3450 scalar_dest
= gimple_call_lhs (stmt
);
3451 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3453 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3455 stmt_vec_info new_stmt_info
= NULL
;
3456 prev_stmt_info
= NULL
;
3457 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3459 tree prev_res
= NULL_TREE
;
3460 vargs
.safe_grow (nargs
);
3461 orig_vargs
.safe_grow (nargs
);
3462 for (j
= 0; j
< ncopies
; ++j
)
3464 /* Build argument list for the vectorized call. */
3467 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3468 vec
<tree
> vec_oprnds0
;
3470 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3471 vec_oprnds0
= vec_defs
[0];
3473 /* Arguments are ready. Create the new vector stmt. */
3474 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3477 for (k
= 0; k
< nargs
; k
++)
3479 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3480 vargs
[k
] = vec_oprndsk
[i
];
3482 if (modifier
== NARROW
)
3484 /* We don't define any narrowing conditional functions
3486 gcc_assert (mask_opno
< 0);
3487 tree half_res
= make_ssa_name (vectype_in
);
3489 = gimple_build_call_internal_vec (ifn
, vargs
);
3490 gimple_call_set_lhs (call
, half_res
);
3491 gimple_call_set_nothrow (call
, true);
3492 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3495 prev_res
= half_res
;
3498 new_temp
= make_ssa_name (vec_dest
);
3500 = gimple_build_assign (new_temp
, convert_code
,
3501 prev_res
, half_res
);
3503 = vect_finish_stmt_generation (vinfo
, stmt_info
,
3508 if (mask_opno
>= 0 && masked_loop_p
)
3510 unsigned int vec_num
= vec_oprnds0
.length ();
3511 /* Always true for SLP. */
3512 gcc_assert (ncopies
== 1);
3513 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3515 vargs
[mask_opno
] = prepare_load_store_mask
3516 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3520 if (ifn
!= IFN_LAST
)
3521 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3523 call
= gimple_build_call_vec (fndecl
, vargs
);
3524 new_temp
= make_ssa_name (vec_dest
, call
);
3525 gimple_call_set_lhs (call
, new_temp
);
3526 gimple_call_set_nothrow (call
, true);
3528 = vect_finish_stmt_generation (vinfo
, stmt_info
,
3531 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3534 for (i
= 0; i
< nargs
; i
++)
3536 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3537 vec_oprndsi
.release ();
3542 for (i
= 0; i
< nargs
; i
++)
3544 op
= gimple_call_arg (stmt
, i
);
3547 = vect_get_vec_def_for_operand (vinfo
,
3548 op
, stmt_info
, vectypes
[i
]);
3551 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3553 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3556 if (mask_opno
>= 0 && masked_loop_p
)
3558 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3561 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3562 vargs
[mask_opno
], gsi
);
3565 if (cfn
== CFN_GOMP_SIMD_LANE
)
3567 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3569 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3570 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3571 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3572 new_temp
= make_ssa_name (vec_dest
);
3573 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3575 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3577 else if (modifier
== NARROW
)
3579 /* We don't define any narrowing conditional functions at
3581 gcc_assert (mask_opno
< 0);
3582 tree half_res
= make_ssa_name (vectype_in
);
3583 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3584 gimple_call_set_lhs (call
, half_res
);
3585 gimple_call_set_nothrow (call
, true);
3586 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3589 prev_res
= half_res
;
3592 new_temp
= make_ssa_name (vec_dest
);
3593 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3594 prev_res
, half_res
);
3596 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3601 if (ifn
!= IFN_LAST
)
3602 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3604 call
= gimple_build_call_vec (fndecl
, vargs
);
3605 new_temp
= make_ssa_name (vec_dest
, call
);
3606 gimple_call_set_lhs (call
, new_temp
);
3607 gimple_call_set_nothrow (call
, true);
3609 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3612 if (j
== (modifier
== NARROW
? 1 : 0))
3613 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3615 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3617 prev_stmt_info
= new_stmt_info
;
3620 else if (modifier
== NARROW
)
3622 /* We don't define any narrowing conditional functions at present. */
3623 gcc_assert (mask_opno
< 0);
3624 for (j
= 0; j
< ncopies
; ++j
)
3626 /* Build argument list for the vectorized call. */
3628 vargs
.create (nargs
* 2);
3634 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3635 vec
<tree
> vec_oprnds0
;
3637 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3638 vec_oprnds0
= vec_defs
[0];
3640 /* Arguments are ready. Create the new vector stmt. */
3641 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3645 for (k
= 0; k
< nargs
; k
++)
3647 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3648 vargs
.quick_push (vec_oprndsk
[i
]);
3649 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3652 if (ifn
!= IFN_LAST
)
3653 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3655 call
= gimple_build_call_vec (fndecl
, vargs
);
3656 new_temp
= make_ssa_name (vec_dest
, call
);
3657 gimple_call_set_lhs (call
, new_temp
);
3658 gimple_call_set_nothrow (call
, true);
3660 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3661 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3664 for (i
= 0; i
< nargs
; i
++)
3666 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3667 vec_oprndsi
.release ();
3672 for (i
= 0; i
< nargs
; i
++)
3674 op
= gimple_call_arg (stmt
, i
);
3678 = vect_get_vec_def_for_operand (vinfo
, op
, stmt_info
,
3681 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3685 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3688 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3690 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3693 vargs
.quick_push (vec_oprnd0
);
3694 vargs
.quick_push (vec_oprnd1
);
3697 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3698 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3699 gimple_call_set_lhs (new_stmt
, new_temp
);
3701 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3704 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3706 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3708 prev_stmt_info
= new_stmt_info
;
3711 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3714 /* No current target implements this case. */
3719 /* The call in STMT might prevent it from being removed in dce.
3720 We however cannot remove it here, due to the way the ssa name
3721 it defines is mapped to the new definition. So just replace
3722 rhs of the statement with something harmless. */
3727 stmt_info
= vect_orig_stmt (stmt_info
);
3728 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3731 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3732 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3738 struct simd_call_arg_info
3742 HOST_WIDE_INT linear_step
;
3743 enum vect_def_type dt
;
3745 bool simd_lane_linear
;
3748 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3749 is linear within simd lane (but not within whole loop), note it in
3753 vect_simd_lane_linear (tree op
, class loop
*loop
,
3754 struct simd_call_arg_info
*arginfo
)
3756 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3758 if (!is_gimple_assign (def_stmt
)
3759 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3760 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3763 tree base
= gimple_assign_rhs1 (def_stmt
);
3764 HOST_WIDE_INT linear_step
= 0;
3765 tree v
= gimple_assign_rhs2 (def_stmt
);
3766 while (TREE_CODE (v
) == SSA_NAME
)
3769 def_stmt
= SSA_NAME_DEF_STMT (v
);
3770 if (is_gimple_assign (def_stmt
))
3771 switch (gimple_assign_rhs_code (def_stmt
))
3774 t
= gimple_assign_rhs2 (def_stmt
);
3775 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3777 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3778 v
= gimple_assign_rhs1 (def_stmt
);
3781 t
= gimple_assign_rhs2 (def_stmt
);
3782 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3784 linear_step
= tree_to_shwi (t
);
3785 v
= gimple_assign_rhs1 (def_stmt
);
3788 t
= gimple_assign_rhs1 (def_stmt
);
3789 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3790 || (TYPE_PRECISION (TREE_TYPE (v
))
3791 < TYPE_PRECISION (TREE_TYPE (t
))))
3800 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3802 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3803 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3808 arginfo
->linear_step
= linear_step
;
3810 arginfo
->simd_lane_linear
= true;
3816 /* Return the number of elements in vector type VECTYPE, which is associated
3817 with a SIMD clone. At present these vectors always have a constant
3820 static unsigned HOST_WIDE_INT
3821 simd_clone_subparts (tree vectype
)
3823 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3826 /* Function vectorizable_simd_clone_call.
3828 Check if STMT_INFO performs a function call that can be vectorized
3829 by calling a simd clone of the function.
3830 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3831 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3832 Return true if STMT_INFO is vectorizable in this way. */
3835 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3836 gimple_stmt_iterator
*gsi
,
3837 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3838 stmt_vector_for_cost
*)
3843 tree vec_oprnd0
= NULL_TREE
;
3844 stmt_vec_info prev_stmt_info
;
3846 unsigned int nunits
;
3847 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3848 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3849 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3850 tree fndecl
, new_temp
;
3852 auto_vec
<simd_call_arg_info
> arginfo
;
3853 vec
<tree
> vargs
= vNULL
;
3855 tree lhs
, rtype
, ratype
;
3856 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3858 /* Is STMT a vectorizable call? */
3859 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3863 fndecl
= gimple_call_fndecl (stmt
);
3864 if (fndecl
== NULL_TREE
)
3867 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3868 if (node
== NULL
|| node
->simd_clones
== NULL
)
3871 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3874 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3878 if (gimple_call_lhs (stmt
)
3879 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3882 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3884 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3886 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3893 /* Process function arguments. */
3894 nargs
= gimple_call_num_args (stmt
);
3896 /* Bail out if the function has zero arguments. */
3900 arginfo
.reserve (nargs
, true);
3902 for (i
= 0; i
< nargs
; i
++)
3904 simd_call_arg_info thisarginfo
;
3907 thisarginfo
.linear_step
= 0;
3908 thisarginfo
.align
= 0;
3909 thisarginfo
.op
= NULL_TREE
;
3910 thisarginfo
.simd_lane_linear
= false;
3912 op
= gimple_call_arg (stmt
, i
);
3913 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3914 &thisarginfo
.vectype
)
3915 || thisarginfo
.dt
== vect_uninitialized_def
)
3917 if (dump_enabled_p ())
3918 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3919 "use not simple.\n");
3923 if (thisarginfo
.dt
== vect_constant_def
3924 || thisarginfo
.dt
== vect_external_def
)
3925 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3928 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3929 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3931 if (dump_enabled_p ())
3932 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3933 "vector mask arguments are not supported\n");
3938 /* For linear arguments, the analyze phase should have saved
3939 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3940 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3941 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3943 gcc_assert (vec_stmt
);
3944 thisarginfo
.linear_step
3945 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3947 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3948 thisarginfo
.simd_lane_linear
3949 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3950 == boolean_true_node
);
3951 /* If loop has been peeled for alignment, we need to adjust it. */
3952 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3953 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3954 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3956 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3957 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3958 tree opt
= TREE_TYPE (thisarginfo
.op
);
3959 bias
= fold_convert (TREE_TYPE (step
), bias
);
3960 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3962 = fold_build2 (POINTER_TYPE_P (opt
)
3963 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3964 thisarginfo
.op
, bias
);
3968 && thisarginfo
.dt
!= vect_constant_def
3969 && thisarginfo
.dt
!= vect_external_def
3971 && TREE_CODE (op
) == SSA_NAME
3972 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3974 && tree_fits_shwi_p (iv
.step
))
3976 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3977 thisarginfo
.op
= iv
.base
;
3979 else if ((thisarginfo
.dt
== vect_constant_def
3980 || thisarginfo
.dt
== vect_external_def
)
3981 && POINTER_TYPE_P (TREE_TYPE (op
)))
3982 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3983 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3985 if (POINTER_TYPE_P (TREE_TYPE (op
))
3986 && !thisarginfo
.linear_step
3988 && thisarginfo
.dt
!= vect_constant_def
3989 && thisarginfo
.dt
!= vect_external_def
3992 && TREE_CODE (op
) == SSA_NAME
)
3993 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3995 arginfo
.quick_push (thisarginfo
);
3998 unsigned HOST_WIDE_INT vf
;
3999 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
4001 if (dump_enabled_p ())
4002 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4003 "not considering SIMD clones; not yet supported"
4004 " for variable-width vectors.\n");
4008 unsigned int badness
= 0;
4009 struct cgraph_node
*bestn
= NULL
;
4010 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4011 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4013 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4014 n
= n
->simdclone
->next_clone
)
4016 unsigned int this_badness
= 0;
4017 if (n
->simdclone
->simdlen
> vf
4018 || n
->simdclone
->nargs
!= nargs
)
4020 if (n
->simdclone
->simdlen
< vf
)
4021 this_badness
+= (exact_log2 (vf
)
4022 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
4023 if (n
->simdclone
->inbranch
)
4024 this_badness
+= 2048;
4025 int target_badness
= targetm
.simd_clone
.usable (n
);
4026 if (target_badness
< 0)
4028 this_badness
+= target_badness
* 512;
4029 /* FORNOW: Have to add code to add the mask argument. */
4030 if (n
->simdclone
->inbranch
)
4032 for (i
= 0; i
< nargs
; i
++)
4034 switch (n
->simdclone
->args
[i
].arg_type
)
4036 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4037 if (!useless_type_conversion_p
4038 (n
->simdclone
->args
[i
].orig_type
,
4039 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4041 else if (arginfo
[i
].dt
== vect_constant_def
4042 || arginfo
[i
].dt
== vect_external_def
4043 || arginfo
[i
].linear_step
)
4046 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4047 if (arginfo
[i
].dt
!= vect_constant_def
4048 && arginfo
[i
].dt
!= vect_external_def
)
4051 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4052 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4053 if (arginfo
[i
].dt
== vect_constant_def
4054 || arginfo
[i
].dt
== vect_external_def
4055 || (arginfo
[i
].linear_step
4056 != n
->simdclone
->args
[i
].linear_step
))
4059 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4060 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4061 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4062 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4063 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4064 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4068 case SIMD_CLONE_ARG_TYPE_MASK
:
4071 if (i
== (size_t) -1)
4073 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4078 if (arginfo
[i
].align
)
4079 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4080 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4082 if (i
== (size_t) -1)
4084 if (bestn
== NULL
|| this_badness
< badness
)
4087 badness
= this_badness
;
4094 for (i
= 0; i
< nargs
; i
++)
4095 if ((arginfo
[i
].dt
== vect_constant_def
4096 || arginfo
[i
].dt
== vect_external_def
)
4097 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4099 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4100 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4102 if (arginfo
[i
].vectype
== NULL
4103 || (simd_clone_subparts (arginfo
[i
].vectype
)
4104 > bestn
->simdclone
->simdlen
))
4108 fndecl
= bestn
->decl
;
4109 nunits
= bestn
->simdclone
->simdlen
;
4110 ncopies
= vf
/ nunits
;
4112 /* If the function isn't const, only allow it in simd loops where user
4113 has asserted that at least nunits consecutive iterations can be
4114 performed using SIMD instructions. */
4115 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4116 && gimple_vuse (stmt
))
4119 /* Sanity check: make sure that at least one copy of the vectorized stmt
4120 needs to be generated. */
4121 gcc_assert (ncopies
>= 1);
4123 if (!vec_stmt
) /* transformation not required. */
4125 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4126 for (i
= 0; i
< nargs
; i
++)
4127 if ((bestn
->simdclone
->args
[i
].arg_type
4128 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4129 || (bestn
->simdclone
->args
[i
].arg_type
4130 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4132 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4134 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4135 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4136 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4137 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4138 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4139 tree sll
= arginfo
[i
].simd_lane_linear
4140 ? boolean_true_node
: boolean_false_node
;
4141 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4143 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4144 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4145 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4146 dt, slp_node, cost_vec); */
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4156 scalar_dest
= gimple_call_lhs (stmt
);
4157 vec_dest
= NULL_TREE
;
4162 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4163 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4164 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4167 rtype
= TREE_TYPE (ratype
);
4171 prev_stmt_info
= NULL
;
4172 for (j
= 0; j
< ncopies
; ++j
)
4174 /* Build argument list for the vectorized call. */
4176 vargs
.create (nargs
);
4180 for (i
= 0; i
< nargs
; i
++)
4182 unsigned int k
, l
, m
, o
;
4184 op
= gimple_call_arg (stmt
, i
);
4185 switch (bestn
->simdclone
->args
[i
].arg_type
)
4187 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4188 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4189 o
= nunits
/ simd_clone_subparts (atype
);
4190 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4192 if (simd_clone_subparts (atype
)
4193 < simd_clone_subparts (arginfo
[i
].vectype
))
4195 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4196 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4197 / simd_clone_subparts (atype
));
4198 gcc_assert ((k
& (k
- 1)) == 0);
4201 = vect_get_vec_def_for_operand (vinfo
, op
, stmt_info
);
4204 vec_oprnd0
= arginfo
[i
].op
;
4205 if ((m
& (k
- 1)) == 0)
4207 = vect_get_vec_def_for_stmt_copy (vinfo
,
4210 arginfo
[i
].op
= vec_oprnd0
;
4212 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4214 bitsize_int ((m
& (k
- 1)) * prec
));
4216 = gimple_build_assign (make_ssa_name (atype
),
4218 vect_finish_stmt_generation (vinfo
, stmt_info
,
4220 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4224 k
= (simd_clone_subparts (atype
)
4225 / simd_clone_subparts (arginfo
[i
].vectype
));
4226 gcc_assert ((k
& (k
- 1)) == 0);
4227 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4229 vec_alloc (ctor_elts
, k
);
4232 for (l
= 0; l
< k
; l
++)
4234 if (m
== 0 && l
== 0)
4236 = vect_get_vec_def_for_operand (vinfo
,
4240 = vect_get_vec_def_for_stmt_copy (vinfo
,
4242 arginfo
[i
].op
= vec_oprnd0
;
4245 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4249 vargs
.safe_push (vec_oprnd0
);
4252 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4254 = gimple_build_assign (make_ssa_name (atype
),
4256 vect_finish_stmt_generation (vinfo
, stmt_info
,
4258 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4263 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4264 vargs
.safe_push (op
);
4266 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4267 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4272 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4273 &stmts
, true, NULL_TREE
);
4277 edge pe
= loop_preheader_edge (loop
);
4278 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4279 gcc_assert (!new_bb
);
4281 if (arginfo
[i
].simd_lane_linear
)
4283 vargs
.safe_push (arginfo
[i
].op
);
4286 tree phi_res
= copy_ssa_name (op
);
4287 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4288 loop_vinfo
->add_stmt (new_phi
);
4289 add_phi_arg (new_phi
, arginfo
[i
].op
,
4290 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4292 = POINTER_TYPE_P (TREE_TYPE (op
))
4293 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4294 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4295 ? sizetype
: TREE_TYPE (op
);
4297 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4299 tree tcst
= wide_int_to_tree (type
, cst
);
4300 tree phi_arg
= copy_ssa_name (op
);
4302 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4303 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4304 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4305 loop_vinfo
->add_stmt (new_stmt
);
4306 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4308 arginfo
[i
].op
= phi_res
;
4309 vargs
.safe_push (phi_res
);
4314 = POINTER_TYPE_P (TREE_TYPE (op
))
4315 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4316 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4317 ? sizetype
: TREE_TYPE (op
);
4319 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4321 tree tcst
= wide_int_to_tree (type
, cst
);
4322 new_temp
= make_ssa_name (TREE_TYPE (op
));
4324 = gimple_build_assign (new_temp
, code
,
4325 arginfo
[i
].op
, tcst
);
4326 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4327 vargs
.safe_push (new_temp
);
4330 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4331 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4332 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4333 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4334 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4335 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4341 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4344 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4346 new_temp
= create_tmp_var (ratype
);
4347 else if (simd_clone_subparts (vectype
)
4348 == simd_clone_subparts (rtype
))
4349 new_temp
= make_ssa_name (vec_dest
, new_call
);
4351 new_temp
= make_ssa_name (rtype
, new_call
);
4352 gimple_call_set_lhs (new_call
, new_temp
);
4354 stmt_vec_info new_stmt_info
4355 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4359 if (simd_clone_subparts (vectype
) < nunits
)
4362 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4363 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4364 k
= nunits
/ simd_clone_subparts (vectype
);
4365 gcc_assert ((k
& (k
- 1)) == 0);
4366 for (l
= 0; l
< k
; l
++)
4371 t
= build_fold_addr_expr (new_temp
);
4372 t
= build2 (MEM_REF
, vectype
, t
,
4373 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4376 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4377 bitsize_int (prec
), bitsize_int (l
* prec
));
4379 = gimple_build_assign (make_ssa_name (vectype
), t
);
4381 = vect_finish_stmt_generation (vinfo
, stmt_info
,
4384 if (j
== 0 && l
== 0)
4385 STMT_VINFO_VEC_STMT (stmt_info
)
4386 = *vec_stmt
= new_stmt_info
;
4388 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4390 prev_stmt_info
= new_stmt_info
;
4394 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4397 else if (simd_clone_subparts (vectype
) > nunits
)
4399 unsigned int k
= (simd_clone_subparts (vectype
)
4400 / simd_clone_subparts (rtype
));
4401 gcc_assert ((k
& (k
- 1)) == 0);
4402 if ((j
& (k
- 1)) == 0)
4403 vec_alloc (ret_ctor_elts
, k
);
4406 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4407 for (m
= 0; m
< o
; m
++)
4409 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4410 size_int (m
), NULL_TREE
, NULL_TREE
);
4412 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4414 = vect_finish_stmt_generation (vinfo
, stmt_info
,
4416 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4417 gimple_assign_lhs (new_stmt
));
4419 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4422 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4423 if ((j
& (k
- 1)) != k
- 1)
4425 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4427 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4429 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4431 if ((unsigned) j
== k
- 1)
4432 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4434 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4436 prev_stmt_info
= new_stmt_info
;
4441 tree t
= build_fold_addr_expr (new_temp
);
4442 t
= build2 (MEM_REF
, vectype
, t
,
4443 build_int_cst (TREE_TYPE (t
), 0));
4445 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4447 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4448 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4453 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4455 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4457 prev_stmt_info
= new_stmt_info
;
4462 /* The call in STMT might prevent it from being removed in dce.
4463 We however cannot remove it here, due to the way the ssa name
4464 it defines is mapped to the new definition. So just replace
4465 rhs of the statement with something harmless. */
4473 type
= TREE_TYPE (scalar_dest
);
4474 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4475 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4478 new_stmt
= gimple_build_nop ();
4479 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4480 unlink_stmt_vdef (stmt
);
4486 /* Function vect_gen_widened_results_half
4488 Create a vector stmt whose code, type, number of arguments, and result
4489 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4490 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4491 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4492 needs to be created (DECL is a function-decl of a target-builtin).
4493 STMT_INFO is the original scalar stmt that we are vectorizing. */
4496 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4497 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4498 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4499 stmt_vec_info stmt_info
)
4504 /* Generate half of the widened result: */
4505 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4506 if (op_type
!= binary_op
)
4508 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4509 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4510 gimple_assign_set_lhs (new_stmt
, new_temp
);
4511 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4517 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4518 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4519 containing scalar operand), and for the rest we get a copy with
4520 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4521 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4522 The vectors are collected into VEC_OPRNDS. */
4525 vect_get_loop_based_defs (vec_info
*vinfo
, tree
*oprnd
, stmt_vec_info stmt_info
,
4526 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4530 /* Get first vector operand. */
4531 /* All the vector operands except the very first one (that is scalar oprnd)
4533 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4534 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, *oprnd
, stmt_info
);
4536 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4538 vec_oprnds
->quick_push (vec_oprnd
);
4540 /* Get second vector operand. */
4541 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4542 vec_oprnds
->quick_push (vec_oprnd
);
4546 /* For conversion in multiple steps, continue to get operands
4549 vect_get_loop_based_defs (vinfo
, oprnd
, stmt_info
, vec_oprnds
,
4550 multi_step_cvt
- 1);
4554 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4555 For multi-step conversions store the resulting vectors and call the function
4559 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4561 stmt_vec_info stmt_info
,
4563 gimple_stmt_iterator
*gsi
,
4564 slp_tree slp_node
, enum tree_code code
,
4565 stmt_vec_info
*prev_stmt_info
)
4568 tree vop0
, vop1
, new_tmp
, vec_dest
;
4570 vec_dest
= vec_dsts
.pop ();
4572 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4574 /* Create demotion operation. */
4575 vop0
= (*vec_oprnds
)[i
];
4576 vop1
= (*vec_oprnds
)[i
+ 1];
4577 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4578 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4579 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4580 stmt_vec_info new_stmt_info
4581 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4584 /* Store the resulting vector for next recursive call. */
4585 (*vec_oprnds
)[i
/2] = new_tmp
;
4588 /* This is the last step of the conversion sequence. Store the
4589 vectors in SLP_NODE or in vector info of the scalar statement
4590 (or in STMT_VINFO_RELATED_STMT chain). */
4592 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4595 if (!*prev_stmt_info
)
4596 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4598 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4600 *prev_stmt_info
= new_stmt_info
;
4605 /* For multi-step demotion operations we first generate demotion operations
4606 from the source type to the intermediate types, and then combine the
4607 results (stored in VEC_OPRNDS) in demotion operation to the destination
4611 /* At each level of recursion we have half of the operands we had at the
4613 vec_oprnds
->truncate ((i
+1)/2);
4614 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4616 stmt_info
, vec_dsts
, gsi
,
4617 slp_node
, VEC_PACK_TRUNC_EXPR
,
4621 vec_dsts
.quick_push (vec_dest
);
4625 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4626 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4627 STMT_INFO. For multi-step conversions store the resulting vectors and
4628 call the function recursively. */
4631 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4632 vec
<tree
> *vec_oprnds0
,
4633 vec
<tree
> *vec_oprnds1
,
4634 stmt_vec_info stmt_info
, tree vec_dest
,
4635 gimple_stmt_iterator
*gsi
,
4636 enum tree_code code1
,
4637 enum tree_code code2
, int op_type
)
4640 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4641 gimple
*new_stmt1
, *new_stmt2
;
4642 vec
<tree
> vec_tmp
= vNULL
;
4644 vec_tmp
.create (vec_oprnds0
->length () * 2);
4645 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4647 if (op_type
== binary_op
)
4648 vop1
= (*vec_oprnds1
)[i
];
4652 /* Generate the two halves of promotion operation. */
4653 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4654 op_type
, vec_dest
, gsi
,
4656 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4657 op_type
, vec_dest
, gsi
,
4659 if (is_gimple_call (new_stmt1
))
4661 new_tmp1
= gimple_call_lhs (new_stmt1
);
4662 new_tmp2
= gimple_call_lhs (new_stmt2
);
4666 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4667 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4670 /* Store the results for the next step. */
4671 vec_tmp
.quick_push (new_tmp1
);
4672 vec_tmp
.quick_push (new_tmp2
);
4675 vec_oprnds0
->release ();
4676 *vec_oprnds0
= vec_tmp
;
4680 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4681 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4682 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4683 Return true if STMT_INFO is vectorizable in this way. */
4686 vectorizable_conversion (vec_info
*vinfo
,
4687 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4688 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4689 stmt_vector_for_cost
*cost_vec
)
4693 tree op0
, op1
= NULL_TREE
;
4694 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4695 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4696 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4697 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4699 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4701 stmt_vec_info prev_stmt_info
;
4702 poly_uint64 nunits_in
;
4703 poly_uint64 nunits_out
;
4704 tree vectype_out
, vectype_in
;
4706 tree lhs_type
, rhs_type
;
4707 enum { NARROW
, NONE
, WIDEN
} modifier
;
4708 vec
<tree
> vec_oprnds0
= vNULL
;
4709 vec
<tree
> vec_oprnds1
= vNULL
;
4711 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4712 int multi_step_cvt
= 0;
4713 vec
<tree
> interm_types
= vNULL
;
4714 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4716 unsigned short fltsz
;
4718 /* Is STMT a vectorizable conversion? */
4720 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4723 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4727 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4731 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4734 code
= gimple_assign_rhs_code (stmt
);
4735 if (!CONVERT_EXPR_CODE_P (code
)
4736 && code
!= FIX_TRUNC_EXPR
4737 && code
!= FLOAT_EXPR
4738 && code
!= WIDEN_MULT_EXPR
4739 && code
!= WIDEN_LSHIFT_EXPR
)
4742 op_type
= TREE_CODE_LENGTH (code
);
4744 /* Check types of lhs and rhs. */
4745 scalar_dest
= gimple_assign_lhs (stmt
);
4746 lhs_type
= TREE_TYPE (scalar_dest
);
4747 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4749 /* Check the operands of the operation. */
4750 slp_tree slp_op0
, slp_op1
= NULL
;
4751 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4752 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4754 if (dump_enabled_p ())
4755 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4756 "use not simple.\n");
4760 rhs_type
= TREE_TYPE (op0
);
4761 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4762 && !((INTEGRAL_TYPE_P (lhs_type
)
4763 && INTEGRAL_TYPE_P (rhs_type
))
4764 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4765 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4768 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4769 && ((INTEGRAL_TYPE_P (lhs_type
)
4770 && !type_has_mode_precision_p (lhs_type
))
4771 || (INTEGRAL_TYPE_P (rhs_type
)
4772 && !type_has_mode_precision_p (rhs_type
))))
4774 if (dump_enabled_p ())
4775 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4776 "type conversion to/from bit-precision unsupported."
4781 if (op_type
== binary_op
)
4783 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4785 op1
= gimple_assign_rhs2 (stmt
);
4787 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4788 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4790 if (dump_enabled_p ())
4791 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4792 "use not simple.\n");
4795 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4798 vectype_in
= vectype1_in
;
4801 /* If op0 is an external or constant def, infer the vector type
4802 from the scalar type. */
4804 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4806 gcc_assert (vectype_in
);
4809 if (dump_enabled_p ())
4810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4811 "no vectype for scalar type %T\n", rhs_type
);
4816 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4817 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4819 if (dump_enabled_p ())
4820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4821 "can't convert between boolean and non "
4822 "boolean vectors %T\n", rhs_type
);
4827 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4828 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4829 if (known_eq (nunits_out
, nunits_in
))
4831 else if (multiple_p (nunits_out
, nunits_in
))
4835 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4839 /* Multiple types in SLP are handled by creating the appropriate number of
4840 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4844 else if (modifier
== NARROW
)
4845 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4847 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4849 /* Sanity check: make sure that at least one copy of the vectorized stmt
4850 needs to be generated. */
4851 gcc_assert (ncopies
>= 1);
4853 bool found_mode
= false;
4854 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4855 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4856 opt_scalar_mode rhs_mode_iter
;
4858 /* Supportable by target? */
4862 if (code
!= FIX_TRUNC_EXPR
4863 && code
!= FLOAT_EXPR
4864 && !CONVERT_EXPR_CODE_P (code
))
4866 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4870 if (dump_enabled_p ())
4871 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4872 "conversion not supported by target.\n");
4876 if (supportable_widening_operation (vinfo
, code
, stmt_info
, vectype_out
,
4877 vectype_in
, &code1
, &code2
,
4878 &multi_step_cvt
, &interm_types
))
4880 /* Binary widening operation can only be supported directly by the
4882 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4886 if (code
!= FLOAT_EXPR
4887 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4890 fltsz
= GET_MODE_SIZE (lhs_mode
);
4891 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4893 rhs_mode
= rhs_mode_iter
.require ();
4894 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4898 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4899 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4900 if (cvt_type
== NULL_TREE
)
4903 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4905 if (!supportable_convert_operation (code
, vectype_out
,
4906 cvt_type
, &codecvt1
))
4909 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4910 vectype_out
, cvt_type
,
4911 &codecvt1
, &codecvt2
,
4916 gcc_assert (multi_step_cvt
== 0);
4918 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
4920 vectype_in
, &code1
, &code2
,
4921 &multi_step_cvt
, &interm_types
))
4931 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4932 codecvt2
= ERROR_MARK
;
4936 interm_types
.safe_push (cvt_type
);
4937 cvt_type
= NULL_TREE
;
4942 gcc_assert (op_type
== unary_op
);
4943 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4944 &code1
, &multi_step_cvt
,
4948 if (code
!= FIX_TRUNC_EXPR
4949 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4953 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4954 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4955 if (cvt_type
== NULL_TREE
)
4957 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4960 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4961 &code1
, &multi_step_cvt
,
4970 if (!vec_stmt
) /* transformation not required. */
4973 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
4974 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
4976 if (dump_enabled_p ())
4977 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4978 "incompatible vector types for invariants\n");
4981 DUMP_VECT_SCOPE ("vectorizable_conversion");
4982 if (modifier
== NONE
)
4984 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4985 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4988 else if (modifier
== NARROW
)
4990 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4991 /* The final packing step produces one vector result per copy. */
4992 unsigned int nvectors
4993 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
4994 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4995 multi_step_cvt
, cost_vec
);
4999 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5000 /* The initial unpacking step produces two vector results
5001 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5002 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5003 unsigned int nvectors
5005 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5007 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5008 multi_step_cvt
, cost_vec
);
5010 interm_types
.release ();
5015 if (dump_enabled_p ())
5016 dump_printf_loc (MSG_NOTE
, vect_location
,
5017 "transform conversion. ncopies = %d.\n", ncopies
);
5019 if (op_type
== binary_op
)
5021 if (CONSTANT_CLASS_P (op0
))
5022 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5023 else if (CONSTANT_CLASS_P (op1
))
5024 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5027 /* In case of multi-step conversion, we first generate conversion operations
5028 to the intermediate types, and then from that types to the final one.
5029 We create vector destinations for the intermediate type (TYPES) received
5030 from supportable_*_operation, and store them in the correct order
5031 for future use in vect_create_vectorized_*_stmts (). */
5032 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5033 vec_dest
= vect_create_destination_var (scalar_dest
,
5034 (cvt_type
&& modifier
== WIDEN
)
5035 ? cvt_type
: vectype_out
);
5036 vec_dsts
.quick_push (vec_dest
);
5040 for (i
= interm_types
.length () - 1;
5041 interm_types
.iterate (i
, &intermediate_type
); i
--)
5043 vec_dest
= vect_create_destination_var (scalar_dest
,
5045 vec_dsts
.quick_push (vec_dest
);
5050 vec_dest
= vect_create_destination_var (scalar_dest
,
5052 ? vectype_out
: cvt_type
);
5056 if (modifier
== WIDEN
)
5058 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
5059 if (op_type
== binary_op
)
5060 vec_oprnds1
.create (1);
5062 else if (modifier
== NARROW
)
5063 vec_oprnds0
.create (
5064 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
5066 else if (code
== WIDEN_LSHIFT_EXPR
)
5067 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5070 prev_stmt_info
= NULL
;
5074 for (j
= 0; j
< ncopies
; j
++)
5077 vect_get_vec_defs (vinfo
, op0
, NULL
, stmt_info
, &vec_oprnds0
,
5080 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
5082 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5084 stmt_vec_info new_stmt_info
;
5085 /* Arguments are ready, create the new vector stmt. */
5086 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5087 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5088 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5089 gimple_assign_set_lhs (new_stmt
, new_temp
);
5091 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5094 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5097 if (!prev_stmt_info
)
5098 STMT_VINFO_VEC_STMT (stmt_info
)
5099 = *vec_stmt
= new_stmt_info
;
5101 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5102 prev_stmt_info
= new_stmt_info
;
5109 /* In case the vectorization factor (VF) is bigger than the number
5110 of elements that we can fit in a vectype (nunits), we have to
5111 generate more than one vector stmt - i.e - we need to "unroll"
5112 the vector stmt by a factor VF/nunits. */
5113 for (j
= 0; j
< ncopies
; j
++)
5120 if (code
== WIDEN_LSHIFT_EXPR
)
5125 /* Store vec_oprnd1 for every vector stmt to be created
5126 for SLP_NODE. We check during the analysis that all
5127 the shift arguments are the same. */
5128 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5129 vec_oprnds1
.quick_push (vec_oprnd1
);
5131 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
,
5132 &vec_oprnds0
, NULL
, slp_node
);
5135 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
, &vec_oprnds0
,
5136 &vec_oprnds1
, slp_node
);
5140 vec_oprnd0
= vect_get_vec_def_for_operand (vinfo
,
5142 vec_oprnds0
.quick_push (vec_oprnd0
);
5143 if (op_type
== binary_op
)
5145 if (code
== WIDEN_LSHIFT_EXPR
)
5149 = vect_get_vec_def_for_operand (vinfo
,
5151 vec_oprnds1
.quick_push (vec_oprnd1
);
5157 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5158 vec_oprnds0
.truncate (0);
5159 vec_oprnds0
.quick_push (vec_oprnd0
);
5160 if (op_type
== binary_op
)
5162 if (code
== WIDEN_LSHIFT_EXPR
)
5165 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5167 vec_oprnds1
.truncate (0);
5168 vec_oprnds1
.quick_push (vec_oprnd1
);
5172 /* Arguments are ready. Create the new vector stmts. */
5173 for (i
= multi_step_cvt
; i
>= 0; i
--)
5175 tree this_dest
= vec_dsts
[i
];
5176 enum tree_code c1
= code1
, c2
= code2
;
5177 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5182 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5183 &vec_oprnds1
, stmt_info
,
5188 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5190 stmt_vec_info new_stmt_info
;
5193 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5194 new_temp
= make_ssa_name (vec_dest
);
5196 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5198 = vect_finish_stmt_generation (vinfo
, stmt_info
,
5202 new_stmt_info
= vinfo
->lookup_def (vop0
);
5205 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5208 if (!prev_stmt_info
)
5209 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5211 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5212 prev_stmt_info
= new_stmt_info
;
5217 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5221 /* In case the vectorization factor (VF) is bigger than the number
5222 of elements that we can fit in a vectype (nunits), we have to
5223 generate more than one vector stmt - i.e - we need to "unroll"
5224 the vector stmt by a factor VF/nunits. */
5225 for (j
= 0; j
< ncopies
; j
++)
5229 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
,
5233 vec_oprnds0
.truncate (0);
5234 vect_get_loop_based_defs (vinfo
,
5235 &last_oprnd
, stmt_info
, &vec_oprnds0
,
5236 vect_pow2 (multi_step_cvt
) - 1);
5239 /* Arguments are ready. Create the new vector stmts. */
5241 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5243 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5244 new_temp
= make_ssa_name (vec_dest
);
5246 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5247 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5248 vec_oprnds0
[i
] = new_temp
;
5251 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5253 stmt_info
, vec_dsts
, gsi
,
5258 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5262 vec_oprnds0
.release ();
5263 vec_oprnds1
.release ();
5264 interm_types
.release ();
5269 /* Return true if we can assume from the scalar form of STMT_INFO that
5270 neither the scalar nor the vector forms will generate code. STMT_INFO
5271 is known not to involve a data reference. */
5274 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5276 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5280 tree lhs
= gimple_assign_lhs (stmt
);
5281 tree_code code
= gimple_assign_rhs_code (stmt
);
5282 tree rhs
= gimple_assign_rhs1 (stmt
);
5284 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5287 if (CONVERT_EXPR_CODE_P (code
))
5288 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5293 /* Function vectorizable_assignment.
5295 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5296 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5297 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5298 Return true if STMT_INFO is vectorizable in this way. */
5301 vectorizable_assignment (vec_info
*vinfo
,
5302 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5303 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5304 stmt_vector_for_cost
*cost_vec
)
5309 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5311 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5315 vec
<tree
> vec_oprnds
= vNULL
;
5317 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5318 stmt_vec_info prev_stmt_info
= NULL
;
5319 enum tree_code code
;
5322 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5325 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5329 /* Is vectorizable assignment? */
5330 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5334 scalar_dest
= gimple_assign_lhs (stmt
);
5335 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5338 if (STMT_VINFO_DATA_REF (stmt_info
))
5341 code
= gimple_assign_rhs_code (stmt
);
5342 if (!(gimple_assign_single_p (stmt
)
5343 || code
== PAREN_EXPR
5344 || CONVERT_EXPR_CODE_P (code
)))
5347 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5348 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5350 /* Multiple types in SLP are handled by creating the appropriate number of
5351 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5356 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5358 gcc_assert (ncopies
>= 1);
5361 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5362 &dt
[0], &vectype_in
))
5364 if (dump_enabled_p ())
5365 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5366 "use not simple.\n");
5370 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5372 /* We can handle NOP_EXPR conversions that do not change the number
5373 of elements or the vector size. */
5374 if ((CONVERT_EXPR_CODE_P (code
)
5375 || code
== VIEW_CONVERT_EXPR
)
5377 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5378 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5379 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5382 /* We do not handle bit-precision changes. */
5383 if ((CONVERT_EXPR_CODE_P (code
)
5384 || code
== VIEW_CONVERT_EXPR
)
5385 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5386 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5387 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5388 /* But a conversion that does not change the bit-pattern is ok. */
5389 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5390 > TYPE_PRECISION (TREE_TYPE (op
)))
5391 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5392 /* Conversion between boolean types of different sizes is
5393 a simple assignment in case their vectypes are same
5395 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5396 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5400 "type conversion to/from bit-precision "
5405 if (!vec_stmt
) /* transformation not required. */
5408 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5410 if (dump_enabled_p ())
5411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5412 "incompatible vector types for invariants\n");
5415 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5416 DUMP_VECT_SCOPE ("vectorizable_assignment");
5417 if (!vect_nop_conversion_p (stmt_info
))
5418 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5424 if (dump_enabled_p ())
5425 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5428 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5431 for (j
= 0; j
< ncopies
; j
++)
5435 vect_get_vec_defs (vinfo
, op
, NULL
, stmt_info
, &vec_oprnds
, NULL
,
5438 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5440 /* Arguments are ready. create the new vector stmt. */
5441 stmt_vec_info new_stmt_info
= NULL
;
5442 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5444 if (CONVERT_EXPR_CODE_P (code
)
5445 || code
== VIEW_CONVERT_EXPR
)
5446 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5447 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5448 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5449 gimple_assign_set_lhs (new_stmt
, new_temp
);
5451 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5453 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5460 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5462 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5464 prev_stmt_info
= new_stmt_info
;
5467 vec_oprnds
.release ();
5472 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5473 either as shift by a scalar or by a vector. */
5476 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5479 machine_mode vec_mode
;
5484 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5488 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5490 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5492 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5494 || (optab_handler (optab
, TYPE_MODE (vectype
))
5495 == CODE_FOR_nothing
))
5499 vec_mode
= TYPE_MODE (vectype
);
5500 icode
= (int) optab_handler (optab
, vec_mode
);
5501 if (icode
== CODE_FOR_nothing
)
5508 /* Function vectorizable_shift.
5510 Check if STMT_INFO performs a shift operation that can be vectorized.
5511 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5512 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5513 Return true if STMT_INFO is vectorizable in this way. */
5516 vectorizable_shift (vec_info
*vinfo
,
5517 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5518 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5519 stmt_vector_for_cost
*cost_vec
)
5523 tree op0
, op1
= NULL
;
5524 tree vec_oprnd1
= NULL_TREE
;
5526 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5527 enum tree_code code
;
5528 machine_mode vec_mode
;
5532 machine_mode optab_op2_mode
;
5533 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5535 stmt_vec_info prev_stmt_info
;
5536 poly_uint64 nunits_in
;
5537 poly_uint64 nunits_out
;
5542 vec
<tree
> vec_oprnds0
= vNULL
;
5543 vec
<tree
> vec_oprnds1
= vNULL
;
5546 bool scalar_shift_arg
= true;
5547 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5548 bool incompatible_op1_vectype_p
= false;
5550 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5553 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5554 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5558 /* Is STMT a vectorizable binary/unary operation? */
5559 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5563 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5566 code
= gimple_assign_rhs_code (stmt
);
5568 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5569 || code
== RROTATE_EXPR
))
5572 scalar_dest
= gimple_assign_lhs (stmt
);
5573 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5574 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5576 if (dump_enabled_p ())
5577 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5578 "bit-precision shifts not supported.\n");
5583 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5584 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5586 if (dump_enabled_p ())
5587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5588 "use not simple.\n");
5591 /* If op0 is an external or constant def, infer the vector type
5592 from the scalar type. */
5594 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5596 gcc_assert (vectype
);
5599 if (dump_enabled_p ())
5600 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5601 "no vectype for scalar type\n");
5605 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5606 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5607 if (maybe_ne (nunits_out
, nunits_in
))
5610 stmt_vec_info op1_def_stmt_info
;
5612 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5613 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5615 if (dump_enabled_p ())
5616 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5617 "use not simple.\n");
5621 /* Multiple types in SLP are handled by creating the appropriate number of
5622 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5627 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5629 gcc_assert (ncopies
>= 1);
5631 /* Determine whether the shift amount is a vector, or scalar. If the
5632 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5634 if ((dt
[1] == vect_internal_def
5635 || dt
[1] == vect_induction_def
5636 || dt
[1] == vect_nested_cycle
)
5638 scalar_shift_arg
= false;
5639 else if (dt
[1] == vect_constant_def
5640 || dt
[1] == vect_external_def
5641 || dt
[1] == vect_internal_def
)
5643 /* In SLP, need to check whether the shift count is the same,
5644 in loops if it is a constant or invariant, it is always
5648 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5649 stmt_vec_info slpstmt_info
;
5651 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5653 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5654 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5655 scalar_shift_arg
= false;
5658 /* For internal SLP defs we have to make sure we see scalar stmts
5659 for all vector elements.
5660 ??? For different vectors we could resort to a different
5661 scalar shift operand but code-generation below simply always
5663 if (dt
[1] == vect_internal_def
5664 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5666 scalar_shift_arg
= false;
5669 /* If the shift amount is computed by a pattern stmt we cannot
5670 use the scalar amount directly thus give up and use a vector
5672 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5673 scalar_shift_arg
= false;
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5679 "operand mode requires invariant argument.\n");
5683 /* Vector shifted by vector. */
5684 bool was_scalar_shift_arg
= scalar_shift_arg
;
5685 if (!scalar_shift_arg
)
5687 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5688 if (dump_enabled_p ())
5689 dump_printf_loc (MSG_NOTE
, vect_location
,
5690 "vector/vector shift/rotate found.\n");
5693 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5695 incompatible_op1_vectype_p
5696 = (op1_vectype
== NULL_TREE
5697 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5698 TYPE_VECTOR_SUBPARTS (vectype
))
5699 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5700 if (incompatible_op1_vectype_p
5702 || SLP_TREE_DEF_TYPE
5703 (SLP_TREE_CHILDREN (slp_node
)[1]) != vect_constant_def
))
5705 if (dump_enabled_p ())
5706 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5707 "unusable type for last operand in"
5708 " vector/vector shift/rotate.\n");
5712 /* See if the machine has a vector shifted by scalar insn and if not
5713 then see if it has a vector shifted by vector insn. */
5716 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5718 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5720 if (dump_enabled_p ())
5721 dump_printf_loc (MSG_NOTE
, vect_location
,
5722 "vector/scalar shift/rotate found.\n");
5726 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5728 && (optab_handler (optab
, TYPE_MODE (vectype
))
5729 != CODE_FOR_nothing
))
5731 scalar_shift_arg
= false;
5733 if (dump_enabled_p ())
5734 dump_printf_loc (MSG_NOTE
, vect_location
,
5735 "vector/vector shift/rotate found.\n");
5738 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5742 /* Unlike the other binary operators, shifts/rotates have
5743 the rhs being int, instead of the same type as the lhs,
5744 so make sure the scalar is the right type if we are
5745 dealing with vectors of long long/long/short/char. */
5746 incompatible_op1_vectype_p
5748 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5754 /* Supportable by target? */
5757 if (dump_enabled_p ())
5758 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5762 vec_mode
= TYPE_MODE (vectype
);
5763 icode
= (int) optab_handler (optab
, vec_mode
);
5764 if (icode
== CODE_FOR_nothing
)
5766 if (dump_enabled_p ())
5767 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5768 "op not supported by target.\n");
5769 /* Check only during analysis. */
5770 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5772 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5774 if (dump_enabled_p ())
5775 dump_printf_loc (MSG_NOTE
, vect_location
,
5776 "proceeding using word mode.\n");
5779 /* Worthwhile without SIMD support? Check only during analysis. */
5781 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5782 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5784 if (dump_enabled_p ())
5785 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5786 "not worthwhile without SIMD support.\n");
5790 if (!vec_stmt
) /* transformation not required. */
5793 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5794 || (!scalar_shift_arg
5795 && !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
))))
5797 if (dump_enabled_p ())
5798 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5799 "incompatible vector types for invariants\n");
5802 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5803 DUMP_VECT_SCOPE ("vectorizable_shift");
5804 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5805 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5811 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE
, vect_location
,
5813 "transform binary/unary operation.\n");
5815 if (incompatible_op1_vectype_p
&& !slp_node
)
5817 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5818 if (dt
[1] != vect_constant_def
)
5819 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5820 TREE_TYPE (vectype
), NULL
);
5824 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5826 prev_stmt_info
= NULL
;
5827 for (j
= 0; j
< ncopies
; j
++)
5832 if (scalar_shift_arg
)
5834 /* Vector shl and shr insn patterns can be defined with scalar
5835 operand 2 (shift operand). In this case, use constant or loop
5836 invariant op1 directly, without extending it to vector mode
5838 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5839 if (!VECTOR_MODE_P (optab_op2_mode
))
5841 if (dump_enabled_p ())
5842 dump_printf_loc (MSG_NOTE
, vect_location
,
5843 "operand 1 using scalar mode.\n");
5845 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5846 vec_oprnds1
.quick_push (vec_oprnd1
);
5849 /* Store vec_oprnd1 for every vector stmt to be created
5850 for SLP_NODE. We check during the analysis that all
5851 the shift arguments are the same.
5852 TODO: Allow different constants for different vector
5853 stmts generated for an SLP instance. */
5854 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5855 vec_oprnds1
.quick_push (vec_oprnd1
);
5859 else if (slp_node
&& incompatible_op1_vectype_p
)
5861 if (was_scalar_shift_arg
)
5863 /* If the argument was the same in all lanes create
5864 the correctly typed vector shift amount directly. */
5865 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5866 op1
= vect_init_vector (vinfo
, stmt_info
,
5867 op1
, TREE_TYPE (vectype
),
5868 !loop_vinfo
? gsi
: NULL
);
5869 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5870 !loop_vinfo
? gsi
: NULL
);
5871 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5872 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5873 vec_oprnds1
.quick_push (vec_oprnd1
);
5875 else if (dt
[1] == vect_constant_def
)
5877 /* Convert the scalar constant shift amounts in-place. */
5878 slp_tree shift
= SLP_TREE_CHILDREN (slp_node
)[1];
5879 gcc_assert (SLP_TREE_DEF_TYPE (shift
) == vect_constant_def
);
5880 for (unsigned i
= 0;
5881 i
< SLP_TREE_SCALAR_OPS (shift
).length (); ++i
)
5883 SLP_TREE_SCALAR_OPS (shift
)[i
]
5884 = fold_convert (TREE_TYPE (vectype
),
5885 SLP_TREE_SCALAR_OPS (shift
)[i
]);
5886 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift
)[i
])
5891 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5894 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5895 (a special case for certain kind of vector shifts); otherwise,
5896 operand 1 should be of a vector type (the usual case). */
5898 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
,
5899 &vec_oprnds0
, NULL
, slp_node
);
5901 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
,
5902 &vec_oprnds0
, &vec_oprnds1
, slp_node
);
5905 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5907 /* Arguments are ready. Create the new vector stmt. */
5908 stmt_vec_info new_stmt_info
= NULL
;
5909 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5911 vop1
= vec_oprnds1
[i
];
5912 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5913 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5914 gimple_assign_set_lhs (new_stmt
, new_temp
);
5916 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5918 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5925 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5927 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5928 prev_stmt_info
= new_stmt_info
;
5931 vec_oprnds0
.release ();
5932 vec_oprnds1
.release ();
5938 /* Function vectorizable_operation.
5940 Check if STMT_INFO performs a binary, unary or ternary operation that can
5942 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5943 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5944 Return true if STMT_INFO is vectorizable in this way. */
5947 vectorizable_operation (vec_info
*vinfo
,
5948 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5949 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5950 stmt_vector_for_cost
*cost_vec
)
5954 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5956 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5957 enum tree_code code
, orig_code
;
5958 machine_mode vec_mode
;
5962 bool target_support_p
;
5963 enum vect_def_type dt
[3]
5964 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5966 stmt_vec_info prev_stmt_info
;
5967 poly_uint64 nunits_in
;
5968 poly_uint64 nunits_out
;
5970 int ncopies
, vec_num
;
5972 vec
<tree
> vec_oprnds0
= vNULL
;
5973 vec
<tree
> vec_oprnds1
= vNULL
;
5974 vec
<tree
> vec_oprnds2
= vNULL
;
5975 tree vop0
, vop1
, vop2
;
5976 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5978 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5981 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5985 /* Is STMT a vectorizable binary/unary operation? */
5986 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5990 /* Loads and stores are handled in vectorizable_{load,store}. */
5991 if (STMT_VINFO_DATA_REF (stmt_info
))
5994 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5996 /* Shifts are handled in vectorizable_shift. */
5997 if (code
== LSHIFT_EXPR
5998 || code
== RSHIFT_EXPR
5999 || code
== LROTATE_EXPR
6000 || code
== RROTATE_EXPR
)
6003 /* Comparisons are handled in vectorizable_comparison. */
6004 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6007 /* Conditions are handled in vectorizable_condition. */
6008 if (code
== COND_EXPR
)
6011 /* For pointer addition and subtraction, we should use the normal
6012 plus and minus for the vector operation. */
6013 if (code
== POINTER_PLUS_EXPR
)
6015 if (code
== POINTER_DIFF_EXPR
)
6018 /* Support only unary or binary operations. */
6019 op_type
= TREE_CODE_LENGTH (code
);
6020 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6022 if (dump_enabled_p ())
6023 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6024 "num. args = %d (not unary/binary/ternary op).\n",
6029 scalar_dest
= gimple_assign_lhs (stmt
);
6030 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6032 /* Most operations cannot handle bit-precision types without extra
6034 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6036 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6037 /* Exception are bitwise binary operations. */
6038 && code
!= BIT_IOR_EXPR
6039 && code
!= BIT_XOR_EXPR
6040 && code
!= BIT_AND_EXPR
)
6042 if (dump_enabled_p ())
6043 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6044 "bit-precision arithmetic not supported.\n");
6049 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6050 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6052 if (dump_enabled_p ())
6053 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6054 "use not simple.\n");
6057 /* If op0 is an external or constant def, infer the vector type
6058 from the scalar type. */
6061 /* For boolean type we cannot determine vectype by
6062 invariant value (don't know whether it is a vector
6063 of booleans or vector of integers). We use output
6064 vectype because operations on boolean don't change
6066 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6068 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6070 if (dump_enabled_p ())
6071 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6072 "not supported operation on bool value.\n");
6075 vectype
= vectype_out
;
6078 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6082 gcc_assert (vectype
);
6085 if (dump_enabled_p ())
6086 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6087 "no vectype for scalar type %T\n",
6093 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6094 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6095 if (maybe_ne (nunits_out
, nunits_in
))
6098 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6099 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6100 if (op_type
== binary_op
|| op_type
== ternary_op
)
6102 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6103 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6105 if (dump_enabled_p ())
6106 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6107 "use not simple.\n");
6111 if (op_type
== ternary_op
)
6113 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6114 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6116 if (dump_enabled_p ())
6117 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6118 "use not simple.\n");
6123 /* Multiple types in SLP are handled by creating the appropriate number of
6124 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6129 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6133 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6137 gcc_assert (ncopies
>= 1);
6139 /* Reject attempts to combine mask types with nonmask types, e.g. if
6140 we have an AND between a (nonmask) boolean loaded from memory and
6141 a (mask) boolean result of a comparison.
6143 TODO: We could easily fix these cases up using pattern statements. */
6144 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6145 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6146 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6148 if (dump_enabled_p ())
6149 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6150 "mixed mask and nonmask vector types\n");
6154 /* Supportable by target? */
6156 vec_mode
= TYPE_MODE (vectype
);
6157 if (code
== MULT_HIGHPART_EXPR
)
6158 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6161 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6164 if (dump_enabled_p ())
6165 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6169 target_support_p
= (optab_handler (optab
, vec_mode
)
6170 != CODE_FOR_nothing
);
6173 if (!target_support_p
)
6175 if (dump_enabled_p ())
6176 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6177 "op not supported by target.\n");
6178 /* Check only during analysis. */
6179 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6180 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
6182 if (dump_enabled_p ())
6183 dump_printf_loc (MSG_NOTE
, vect_location
,
6184 "proceeding using word mode.\n");
6187 /* Worthwhile without SIMD support? Check only during analysis. */
6188 if (!VECTOR_MODE_P (vec_mode
)
6190 && !vect_worthwhile_without_simd_p (vinfo
, code
))
6192 if (dump_enabled_p ())
6193 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6194 "not worthwhile without SIMD support.\n");
6198 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6199 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6200 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6202 if (!vec_stmt
) /* transformation not required. */
6204 /* If this operation is part of a reduction, a fully-masked loop
6205 should only change the active lanes of the reduction chain,
6206 keeping the inactive lanes as-is. */
6208 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
6211 if (cond_fn
== IFN_LAST
6212 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6213 OPTIMIZE_FOR_SPEED
))
6215 if (dump_enabled_p ())
6216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6217 "can't use a fully-masked loop because no"
6218 " conditional operation is available.\n");
6219 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
6222 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6226 /* Put types on constant and invariant SLP children. */
6228 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6229 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6230 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6232 if (dump_enabled_p ())
6233 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6234 "incompatible vector types for invariants\n");
6238 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6239 DUMP_VECT_SCOPE ("vectorizable_operation");
6240 vect_model_simple_cost (vinfo
, stmt_info
,
6241 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6247 if (dump_enabled_p ())
6248 dump_printf_loc (MSG_NOTE
, vect_location
,
6249 "transform binary/unary operation.\n");
6251 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6253 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6254 vectors with unsigned elements, but the result is signed. So, we
6255 need to compute the MINUS_EXPR into vectype temporary and
6256 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6257 tree vec_cvt_dest
= NULL_TREE
;
6258 if (orig_code
== POINTER_DIFF_EXPR
)
6260 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6261 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6265 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6267 /* In case the vectorization factor (VF) is bigger than the number
6268 of elements that we can fit in a vectype (nunits), we have to generate
6269 more than one vector stmt - i.e - we need to "unroll" the
6270 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6271 from one copy of the vector stmt to the next, in the field
6272 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6273 stages to find the correct vector defs to be used when vectorizing
6274 stmts that use the defs of the current stmt. The example below
6275 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6276 we need to create 4 vectorized stmts):
6278 before vectorization:
6279 RELATED_STMT VEC_STMT
6283 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6285 RELATED_STMT VEC_STMT
6286 VS1_0: vx0 = memref0 VS1_1 -
6287 VS1_1: vx1 = memref1 VS1_2 -
6288 VS1_2: vx2 = memref2 VS1_3 -
6289 VS1_3: vx3 = memref3 - -
6290 S1: x = load - VS1_0
6293 step2: vectorize stmt S2 (done here):
6294 To vectorize stmt S2 we first need to find the relevant vector
6295 def for the first operand 'x'. This is, as usual, obtained from
6296 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6297 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6298 relevant vector def 'vx0'. Having found 'vx0' we can generate
6299 the vector stmt VS2_0, and as usual, record it in the
6300 STMT_VINFO_VEC_STMT of stmt S2.
6301 When creating the second copy (VS2_1), we obtain the relevant vector
6302 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6303 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6304 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6305 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6306 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6307 chain of stmts and pointers:
6308 RELATED_STMT VEC_STMT
6309 VS1_0: vx0 = memref0 VS1_1 -
6310 VS1_1: vx1 = memref1 VS1_2 -
6311 VS1_2: vx2 = memref2 VS1_3 -
6312 VS1_3: vx3 = memref3 - -
6313 S1: x = load - VS1_0
6314 VS2_0: vz0 = vx0 + v1 VS2_1 -
6315 VS2_1: vz1 = vx1 + v1 VS2_2 -
6316 VS2_2: vz2 = vx2 + v1 VS2_3 -
6317 VS2_3: vz3 = vx3 + v1 - -
6318 S2: z = x + 1 - VS2_0 */
6320 prev_stmt_info
= NULL
;
6321 for (j
= 0; j
< ncopies
; j
++)
6326 if (op_type
== binary_op
)
6327 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
,
6328 &vec_oprnds0
, &vec_oprnds1
, slp_node
);
6329 else if (op_type
== ternary_op
)
6333 auto_vec
<vec
<tree
> > vec_defs(3);
6334 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
6335 vec_oprnds0
= vec_defs
[0];
6336 vec_oprnds1
= vec_defs
[1];
6337 vec_oprnds2
= vec_defs
[2];
6341 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
, &vec_oprnds0
,
6342 &vec_oprnds1
, NULL
);
6343 vect_get_vec_defs (vinfo
, op2
, NULL_TREE
, stmt_info
,
6344 &vec_oprnds2
, NULL
, NULL
);
6348 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
,
6353 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6354 if (op_type
== ternary_op
)
6356 tree vec_oprnd
= vec_oprnds2
.pop ();
6357 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6362 /* Arguments are ready. Create the new vector stmt. */
6363 stmt_vec_info new_stmt_info
= NULL
;
6364 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6366 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6367 ? vec_oprnds1
[i
] : NULL_TREE
);
6368 vop2
= ((op_type
== ternary_op
)
6369 ? vec_oprnds2
[i
] : NULL_TREE
);
6370 if (masked_loop_p
&& reduc_idx
>= 0)
6372 /* Perform the operation on active elements only and take
6373 inactive elements from the reduction chain input. */
6375 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6376 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6377 vectype
, i
* ncopies
+ j
);
6378 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6380 new_temp
= make_ssa_name (vec_dest
, call
);
6381 gimple_call_set_lhs (call
, new_temp
);
6382 gimple_call_set_nothrow (call
, true);
6384 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6388 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6390 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6391 gimple_assign_set_lhs (new_stmt
, new_temp
);
6393 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6396 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6398 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6400 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6401 gimple_assign_set_lhs (new_stmt
, new_temp
);
6402 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
6407 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6414 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6416 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6417 prev_stmt_info
= new_stmt_info
;
6420 vec_oprnds0
.release ();
6421 vec_oprnds1
.release ();
6422 vec_oprnds2
.release ();
6427 /* A helper function to ensure data reference DR_INFO's base alignment. */
6430 ensure_base_align (dr_vec_info
*dr_info
)
6432 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6435 if (dr_info
->base_misaligned
)
6437 tree base_decl
= dr_info
->base_decl
;
6439 // We should only be able to increase the alignment of a base object if
6440 // we know what its new alignment should be at compile time.
6441 unsigned HOST_WIDE_INT align_base_to
=
6442 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6444 if (decl_in_symtab_p (base_decl
))
6445 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6446 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6448 SET_DECL_ALIGN (base_decl
, align_base_to
);
6449 DECL_USER_ALIGN (base_decl
) = 1;
6451 dr_info
->base_misaligned
= false;
6456 /* Function get_group_alias_ptr_type.
6458 Return the alias type for the group starting at FIRST_STMT_INFO. */
6461 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6463 struct data_reference
*first_dr
, *next_dr
;
6465 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6466 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6467 while (next_stmt_info
)
6469 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6470 if (get_alias_set (DR_REF (first_dr
))
6471 != get_alias_set (DR_REF (next_dr
)))
6473 if (dump_enabled_p ())
6474 dump_printf_loc (MSG_NOTE
, vect_location
,
6475 "conflicting alias set types.\n");
6476 return ptr_type_node
;
6478 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6480 return reference_alias_ptr_type (DR_REF (first_dr
));
6484 /* Function scan_operand_equal_p.
6486 Helper function for check_scan_store. Compare two references
6487 with .GOMP_SIMD_LANE bases. */
6490 scan_operand_equal_p (tree ref1
, tree ref2
)
6492 tree ref
[2] = { ref1
, ref2
};
6493 poly_int64 bitsize
[2], bitpos
[2];
6494 tree offset
[2], base
[2];
6495 for (int i
= 0; i
< 2; ++i
)
6498 int unsignedp
, reversep
, volatilep
= 0;
6499 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6500 &offset
[i
], &mode
, &unsignedp
,
6501 &reversep
, &volatilep
);
6502 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6504 if (TREE_CODE (base
[i
]) == MEM_REF
6505 && offset
[i
] == NULL_TREE
6506 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6508 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6509 if (is_gimple_assign (def_stmt
)
6510 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6511 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6512 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6514 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6516 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6517 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6522 if (!operand_equal_p (base
[0], base
[1], 0))
6524 if (maybe_ne (bitsize
[0], bitsize
[1]))
6526 if (offset
[0] != offset
[1])
6528 if (!offset
[0] || !offset
[1])
6530 if (!operand_equal_p (offset
[0], offset
[1], 0))
6533 for (int i
= 0; i
< 2; ++i
)
6535 step
[i
] = integer_one_node
;
6536 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6538 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6539 if (is_gimple_assign (def_stmt
)
6540 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6541 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6544 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6545 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6548 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6550 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6551 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6553 tree rhs1
= NULL_TREE
;
6554 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6556 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6557 if (gimple_assign_cast_p (def_stmt
))
6558 rhs1
= gimple_assign_rhs1 (def_stmt
);
6560 else if (CONVERT_EXPR_P (offset
[i
]))
6561 rhs1
= TREE_OPERAND (offset
[i
], 0);
6563 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6564 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6565 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6566 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6569 if (!operand_equal_p (offset
[0], offset
[1], 0)
6570 || !operand_equal_p (step
[0], step
[1], 0))
6578 enum scan_store_kind
{
6579 /* Normal permutation. */
6580 scan_store_kind_perm
,
6582 /* Whole vector left shift permutation with zero init. */
6583 scan_store_kind_lshift_zero
,
6585 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6586 scan_store_kind_lshift_cond
6589 /* Function check_scan_store.
6591 Verify if we can perform the needed permutations or whole vector shifts.
6592 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6593 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6594 to do at each step. */
6597 scan_store_can_perm_p (tree vectype
, tree init
,
6598 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6600 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6601 unsigned HOST_WIDE_INT nunits
;
6602 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6604 int units_log2
= exact_log2 (nunits
);
6605 if (units_log2
<= 0)
6609 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6610 for (i
= 0; i
<= units_log2
; ++i
)
6612 unsigned HOST_WIDE_INT j
, k
;
6613 enum scan_store_kind kind
= scan_store_kind_perm
;
6614 vec_perm_builder
sel (nunits
, nunits
, 1);
6615 sel
.quick_grow (nunits
);
6616 if (i
== units_log2
)
6618 for (j
= 0; j
< nunits
; ++j
)
6619 sel
[j
] = nunits
- 1;
6623 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6625 for (k
= 0; j
< nunits
; ++j
, ++k
)
6626 sel
[j
] = nunits
+ k
;
6628 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6629 if (!can_vec_perm_const_p (vec_mode
, indices
))
6631 if (i
== units_log2
)
6634 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6636 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6638 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6639 /* Whole vector shifts shift in zeros, so if init is all zero
6640 constant, there is no need to do anything further. */
6641 if ((TREE_CODE (init
) != INTEGER_CST
6642 && TREE_CODE (init
) != REAL_CST
)
6643 || !initializer_zerop (init
))
6645 tree masktype
= truth_type_for (vectype
);
6646 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6648 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6651 kind
= whole_vector_shift_kind
;
6653 if (use_whole_vector
)
6655 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6656 use_whole_vector
->safe_grow_cleared (i
);
6657 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6658 use_whole_vector
->safe_push (kind
);
6666 /* Function check_scan_store.
6668 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6671 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6672 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6673 vect_memory_access_type memory_access_type
)
6675 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6676 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6679 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6682 || memory_access_type
!= VMAT_CONTIGUOUS
6683 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6684 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6685 || loop_vinfo
== NULL
6686 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6687 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6688 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6689 || !integer_zerop (DR_INIT (dr_info
->dr
))
6690 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6691 || !alias_sets_conflict_p (get_alias_set (vectype
),
6692 get_alias_set (TREE_TYPE (ref_type
))))
6694 if (dump_enabled_p ())
6695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6696 "unsupported OpenMP scan store.\n");
6700 /* We need to pattern match code built by OpenMP lowering and simplified
6701 by following optimizations into something we can handle.
6702 #pragma omp simd reduction(inscan,+:r)
6706 #pragma omp scan inclusive (r)
6709 shall have body with:
6710 // Initialization for input phase, store the reduction initializer:
6711 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6712 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6714 // Actual input phase:
6716 r.0_5 = D.2042[_20];
6719 // Initialization for scan phase:
6720 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6726 // Actual scan phase:
6728 r.1_8 = D.2042[_20];
6730 The "omp simd array" variable D.2042 holds the privatized copy used
6731 inside of the loop and D.2043 is another one that holds copies of
6732 the current original list item. The separate GOMP_SIMD_LANE ifn
6733 kinds are there in order to allow optimizing the initializer store
6734 and combiner sequence, e.g. if it is originally some C++ish user
6735 defined reduction, but allow the vectorizer to pattern recognize it
6736 and turn into the appropriate vectorized scan.
6738 For exclusive scan, this is slightly different:
6739 #pragma omp simd reduction(inscan,+:r)
6743 #pragma omp scan exclusive (r)
6746 shall have body with:
6747 // Initialization for input phase, store the reduction initializer:
6748 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6749 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6751 // Actual input phase:
6753 r.0_5 = D.2042[_20];
6756 // Initialization for scan phase:
6757 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6763 // Actual scan phase:
6765 r.1_8 = D.2044[_20];
6768 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6770 /* Match the D.2042[_21] = 0; store above. Just require that
6771 it is a constant or external definition store. */
6772 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6775 if (dump_enabled_p ())
6776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6777 "unsupported OpenMP scan initializer store.\n");
6781 if (! loop_vinfo
->scan_map
)
6782 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6783 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6784 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6787 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6789 /* These stores can be vectorized normally. */
6793 if (rhs_dt
!= vect_internal_def
)
6796 if (dump_enabled_p ())
6797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6798 "unsupported OpenMP scan combiner pattern.\n");
6802 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6803 tree rhs
= gimple_assign_rhs1 (stmt
);
6804 if (TREE_CODE (rhs
) != SSA_NAME
)
6807 gimple
*other_store_stmt
= NULL
;
6808 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6809 bool inscan_var_store
6810 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6812 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6814 if (!inscan_var_store
)
6816 use_operand_p use_p
;
6817 imm_use_iterator iter
;
6818 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6820 gimple
*use_stmt
= USE_STMT (use_p
);
6821 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6823 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6824 || !is_gimple_assign (use_stmt
)
6825 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6827 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6829 other_store_stmt
= use_stmt
;
6831 if (other_store_stmt
== NULL
)
6833 rhs
= gimple_assign_lhs (other_store_stmt
);
6834 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6838 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6840 use_operand_p use_p
;
6841 imm_use_iterator iter
;
6842 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6844 gimple
*use_stmt
= USE_STMT (use_p
);
6845 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6847 if (other_store_stmt
)
6849 other_store_stmt
= use_stmt
;
6855 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6856 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6857 || !is_gimple_assign (def_stmt
)
6858 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6861 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6862 /* For pointer addition, we should use the normal plus for the vector
6866 case POINTER_PLUS_EXPR
:
6869 case MULT_HIGHPART_EXPR
:
6874 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6877 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6878 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6879 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6882 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6883 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6884 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6885 || !gimple_assign_load_p (load1_stmt
)
6886 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6887 || !gimple_assign_load_p (load2_stmt
))
6890 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6891 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6892 if (load1_stmt_info
== NULL
6893 || load2_stmt_info
== NULL
6894 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6895 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6896 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6897 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6900 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6902 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6903 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6904 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6906 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6908 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6912 use_operand_p use_p
;
6913 imm_use_iterator iter
;
6914 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6916 gimple
*use_stmt
= USE_STMT (use_p
);
6917 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6919 if (other_store_stmt
)
6921 other_store_stmt
= use_stmt
;
6925 if (other_store_stmt
== NULL
)
6927 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6928 || !gimple_store_p (other_store_stmt
))
6931 stmt_vec_info other_store_stmt_info
6932 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6933 if (other_store_stmt_info
== NULL
6934 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6935 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6938 gimple
*stmt1
= stmt
;
6939 gimple
*stmt2
= other_store_stmt
;
6940 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6941 std::swap (stmt1
, stmt2
);
6942 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6943 gimple_assign_rhs1 (load2_stmt
)))
6945 std::swap (rhs1
, rhs2
);
6946 std::swap (load1_stmt
, load2_stmt
);
6947 std::swap (load1_stmt_info
, load2_stmt_info
);
6949 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6950 gimple_assign_rhs1 (load1_stmt
)))
6953 tree var3
= NULL_TREE
;
6954 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6955 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6956 gimple_assign_rhs1 (load2_stmt
)))
6958 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6960 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6961 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6962 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6964 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6965 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6966 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6967 || lookup_attribute ("omp simd inscan exclusive",
6968 DECL_ATTRIBUTES (var3
)))
6972 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6973 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6974 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6977 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6978 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6979 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6980 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6981 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6982 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6985 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6986 std::swap (var1
, var2
);
6988 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6990 if (!lookup_attribute ("omp simd inscan exclusive",
6991 DECL_ATTRIBUTES (var1
)))
6996 if (loop_vinfo
->scan_map
== NULL
)
6998 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7002 /* The IL is as expected, now check if we can actually vectorize it.
7009 should be vectorized as (where _40 is the vectorized rhs
7010 from the D.2042[_21] = 0; store):
7011 _30 = MEM <vector(8) int> [(int *)&D.2043];
7012 _31 = MEM <vector(8) int> [(int *)&D.2042];
7013 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7015 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7016 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7018 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7019 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7020 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7022 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7023 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7025 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7026 MEM <vector(8) int> [(int *)&D.2043] = _39;
7027 MEM <vector(8) int> [(int *)&D.2042] = _38;
7034 should be vectorized as (where _40 is the vectorized rhs
7035 from the D.2042[_21] = 0; store):
7036 _30 = MEM <vector(8) int> [(int *)&D.2043];
7037 _31 = MEM <vector(8) int> [(int *)&D.2042];
7038 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7039 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7041 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7042 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7043 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7045 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7046 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7047 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7049 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7050 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7053 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7054 MEM <vector(8) int> [(int *)&D.2044] = _39;
7055 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7056 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7057 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7058 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7061 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7062 if (units_log2
== -1)
7069 /* Function vectorizable_scan_store.
7071 Helper of vectorizable_score, arguments like on vectorizable_store.
7072 Handle only the transformation, checking is done in check_scan_store. */
7075 vectorizable_scan_store (vec_info
*vinfo
,
7076 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7077 stmt_vec_info
*vec_stmt
, int ncopies
)
7079 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7080 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7081 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7082 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7084 if (dump_enabled_p ())
7085 dump_printf_loc (MSG_NOTE
, vect_location
,
7086 "transform scan store. ncopies = %d\n", ncopies
);
7088 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7089 tree rhs
= gimple_assign_rhs1 (stmt
);
7090 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7092 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7093 bool inscan_var_store
7094 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7096 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7098 use_operand_p use_p
;
7099 imm_use_iterator iter
;
7100 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7102 gimple
*use_stmt
= USE_STMT (use_p
);
7103 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7105 rhs
= gimple_assign_lhs (use_stmt
);
7110 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7111 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7112 if (code
== POINTER_PLUS_EXPR
)
7114 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7115 && commutative_tree_code (code
));
7116 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7117 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7118 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7119 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7120 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7121 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7122 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7123 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7124 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7125 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7126 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7128 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7130 std::swap (rhs1
, rhs2
);
7131 std::swap (var1
, var2
);
7132 std::swap (load1_dr_info
, load2_dr_info
);
7135 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7138 unsigned HOST_WIDE_INT nunits
;
7139 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7141 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7142 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7143 gcc_assert (units_log2
> 0);
7144 auto_vec
<tree
, 16> perms
;
7145 perms
.quick_grow (units_log2
+ 1);
7146 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7147 for (int i
= 0; i
<= units_log2
; ++i
)
7149 unsigned HOST_WIDE_INT j
, k
;
7150 vec_perm_builder
sel (nunits
, nunits
, 1);
7151 sel
.quick_grow (nunits
);
7152 if (i
== units_log2
)
7153 for (j
= 0; j
< nunits
; ++j
)
7154 sel
[j
] = nunits
- 1;
7157 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7159 for (k
= 0; j
< nunits
; ++j
, ++k
)
7160 sel
[j
] = nunits
+ k
;
7162 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7163 if (!use_whole_vector
.is_empty ()
7164 && use_whole_vector
[i
] != scan_store_kind_perm
)
7166 if (zero_vec
== NULL_TREE
)
7167 zero_vec
= build_zero_cst (vectype
);
7168 if (masktype
== NULL_TREE
7169 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7170 masktype
= truth_type_for (vectype
);
7171 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7174 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7177 stmt_vec_info prev_stmt_info
= NULL
;
7178 tree vec_oprnd1
= NULL_TREE
;
7179 tree vec_oprnd2
= NULL_TREE
;
7180 tree vec_oprnd3
= NULL_TREE
;
7181 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7182 tree dataref_offset
= build_int_cst (ref_type
, 0);
7183 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7184 vectype
, VMAT_CONTIGUOUS
);
7185 tree ldataref_ptr
= NULL_TREE
;
7186 tree orig
= NULL_TREE
;
7187 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7188 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7189 for (int j
= 0; j
< ncopies
; j
++)
7191 stmt_vec_info new_stmt_info
;
7194 vec_oprnd1
= vect_get_vec_def_for_operand (vinfo
, *init
, stmt_info
);
7195 if (ldataref_ptr
== NULL
)
7196 vec_oprnd2
= vect_get_vec_def_for_operand (vinfo
, rhs1
, stmt_info
);
7197 vec_oprnd3
= vect_get_vec_def_for_operand (vinfo
, rhs2
, stmt_info
);
7202 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
7203 if (ldataref_ptr
== NULL
)
7204 vec_oprnd2
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd2
);
7205 vec_oprnd3
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd3
);
7206 if (!inscan_var_store
)
7207 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7212 vec_oprnd2
= make_ssa_name (vectype
);
7213 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7214 unshare_expr (ldataref_ptr
),
7216 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7217 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7218 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7219 if (prev_stmt_info
== NULL
)
7220 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7222 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7223 prev_stmt_info
= new_stmt_info
;
7226 tree v
= vec_oprnd2
;
7227 for (int i
= 0; i
< units_log2
; ++i
)
7229 tree new_temp
= make_ssa_name (vectype
);
7230 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7232 && (use_whole_vector
[i
]
7233 != scan_store_kind_perm
))
7234 ? zero_vec
: vec_oprnd1
, v
,
7236 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7237 if (prev_stmt_info
== NULL
)
7238 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7240 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7241 prev_stmt_info
= new_stmt_info
;
7243 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7245 /* Whole vector shift shifted in zero bits, but if *init
7246 is not initializer_zerop, we need to replace those elements
7247 with elements from vec_oprnd1. */
7248 tree_vector_builder
vb (masktype
, nunits
, 1);
7249 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7250 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7251 ? boolean_false_node
: boolean_true_node
);
7253 tree new_temp2
= make_ssa_name (vectype
);
7254 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7255 new_temp
, vec_oprnd1
);
7256 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
7258 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7259 prev_stmt_info
= new_stmt_info
;
7260 new_temp
= new_temp2
;
7263 /* For exclusive scan, perform the perms[i] permutation once
7266 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7274 tree new_temp2
= make_ssa_name (vectype
);
7275 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7276 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7277 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7278 prev_stmt_info
= new_stmt_info
;
7283 tree new_temp
= make_ssa_name (vectype
);
7284 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7285 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7286 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7287 prev_stmt_info
= new_stmt_info
;
7289 tree last_perm_arg
= new_temp
;
7290 /* For exclusive scan, new_temp computed above is the exclusive scan
7291 prefix sum. Turn it into inclusive prefix sum for the broadcast
7292 of the last element into orig. */
7293 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7295 last_perm_arg
= make_ssa_name (vectype
);
7296 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7297 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7298 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7299 prev_stmt_info
= new_stmt_info
;
7302 orig
= make_ssa_name (vectype
);
7303 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7304 last_perm_arg
, perms
[units_log2
]);
7305 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7306 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7307 prev_stmt_info
= new_stmt_info
;
7309 if (!inscan_var_store
)
7311 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7312 unshare_expr (dataref_ptr
),
7314 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7315 g
= gimple_build_assign (data_ref
, new_temp
);
7316 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7317 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7318 prev_stmt_info
= new_stmt_info
;
7322 if (inscan_var_store
)
7323 for (int j
= 0; j
< ncopies
; j
++)
7326 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7328 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7329 unshare_expr (dataref_ptr
),
7331 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7332 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7333 stmt_vec_info new_stmt_info
7334 = vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7335 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7336 prev_stmt_info
= new_stmt_info
;
7342 /* Function vectorizable_store.
7344 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7345 that can be vectorized.
7346 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7347 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7348 Return true if STMT_INFO is vectorizable in this way. */
7351 vectorizable_store (vec_info
*vinfo
,
7352 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7353 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7354 stmt_vector_for_cost
*cost_vec
)
7358 tree vec_oprnd
= NULL_TREE
;
7360 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7361 class loop
*loop
= NULL
;
7362 machine_mode vec_mode
;
7364 enum dr_alignment_support alignment_support_scheme
;
7365 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7366 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7367 stmt_vec_info prev_stmt_info
= NULL
;
7368 tree dataref_ptr
= NULL_TREE
;
7369 tree dataref_offset
= NULL_TREE
;
7370 gimple
*ptr_incr
= NULL
;
7373 stmt_vec_info first_stmt_info
;
7375 unsigned int group_size
, i
;
7376 vec
<tree
> oprnds
= vNULL
;
7377 vec
<tree
> result_chain
= vNULL
;
7378 tree offset
= NULL_TREE
;
7379 vec
<tree
> vec_oprnds
= vNULL
;
7380 bool slp
= (slp_node
!= NULL
);
7381 unsigned int vec_num
;
7382 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7384 gather_scatter_info gs_info
;
7386 vec_load_store_type vls_type
;
7389 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7392 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7396 /* Is vectorizable store? */
7398 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7399 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7401 tree scalar_dest
= gimple_assign_lhs (assign
);
7402 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7403 && is_pattern_stmt_p (stmt_info
))
7404 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7405 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7406 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7407 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7408 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7409 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7410 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7411 && TREE_CODE (scalar_dest
) != MEM_REF
)
7416 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7417 if (!call
|| !gimple_call_internal_p (call
))
7420 internal_fn ifn
= gimple_call_internal_fn (call
);
7421 if (!internal_store_fn_p (ifn
))
7424 if (slp_node
!= NULL
)
7426 if (dump_enabled_p ())
7427 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7428 "SLP of masked stores not supported.\n");
7432 int mask_index
= internal_fn_mask_index (ifn
);
7433 if (mask_index
>= 0)
7435 mask
= gimple_call_arg (call
, mask_index
);
7436 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
7442 op
= vect_get_store_rhs (stmt_info
);
7444 /* Cannot have hybrid store SLP -- that would mean storing to the
7445 same location twice. */
7446 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7448 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7449 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7453 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7454 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7459 /* Multiple types in SLP are handled by creating the appropriate number of
7460 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7465 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7467 gcc_assert (ncopies
>= 1);
7469 /* FORNOW. This restriction should be relaxed. */
7470 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7472 if (dump_enabled_p ())
7473 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7474 "multiple types in nested loop.\n");
7478 if (!vect_check_store_rhs (vinfo
, stmt_info
,
7479 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7482 elem_type
= TREE_TYPE (vectype
);
7483 vec_mode
= TYPE_MODE (vectype
);
7485 if (!STMT_VINFO_DATA_REF (stmt_info
))
7488 vect_memory_access_type memory_access_type
;
7489 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp
, mask
, vls_type
,
7490 ncopies
, &memory_access_type
, &gs_info
))
7495 if (memory_access_type
== VMAT_CONTIGUOUS
)
7497 if (!VECTOR_MODE_P (vec_mode
)
7498 || !can_vec_mask_load_store_p (vec_mode
,
7499 TYPE_MODE (mask_vectype
), false))
7502 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7503 && (memory_access_type
!= VMAT_GATHER_SCATTER
7504 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7506 if (dump_enabled_p ())
7507 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7508 "unsupported access type for masked store.\n");
7514 /* FORNOW. In some cases can vectorize even if data-type not supported
7515 (e.g. - array initialization with 0). */
7516 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7520 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7521 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7522 && memory_access_type
!= VMAT_GATHER_SCATTER
7523 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7526 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7527 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7528 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7532 first_stmt_info
= stmt_info
;
7533 first_dr_info
= dr_info
;
7534 group_size
= vec_num
= 1;
7537 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7539 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7540 memory_access_type
))
7544 if (!vec_stmt
) /* transformation not required. */
7546 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7549 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7550 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
7551 memory_access_type
, &gs_info
, mask
);
7554 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7557 if (dump_enabled_p ())
7558 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7559 "incompatible vector types for invariants\n");
7563 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7564 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7565 memory_access_type
, vls_type
, slp_node
, cost_vec
);
7568 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7572 ensure_base_align (dr_info
);
7574 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7576 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7577 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7578 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7579 tree ptr
, var
, scale
, vec_mask
;
7580 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7581 tree mask_halfvectype
= mask_vectype
;
7582 edge pe
= loop_preheader_edge (loop
);
7585 enum { NARROW
, NONE
, WIDEN
} modifier
;
7586 poly_uint64 scatter_off_nunits
7587 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7589 if (known_eq (nunits
, scatter_off_nunits
))
7591 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7595 /* Currently gathers and scatters are only supported for
7596 fixed-length vectors. */
7597 unsigned int count
= scatter_off_nunits
.to_constant ();
7598 vec_perm_builder
sel (count
, count
, 1);
7599 for (i
= 0; i
< (unsigned int) count
; ++i
)
7600 sel
.quick_push (i
| (count
/ 2));
7602 vec_perm_indices
indices (sel
, 1, count
);
7603 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7605 gcc_assert (perm_mask
!= NULL_TREE
);
7607 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7611 /* Currently gathers and scatters are only supported for
7612 fixed-length vectors. */
7613 unsigned int count
= nunits
.to_constant ();
7614 vec_perm_builder
sel (count
, count
, 1);
7615 for (i
= 0; i
< (unsigned int) count
; ++i
)
7616 sel
.quick_push (i
| (count
/ 2));
7618 vec_perm_indices
indices (sel
, 2, count
);
7619 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7620 gcc_assert (perm_mask
!= NULL_TREE
);
7624 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7629 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7630 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7631 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7632 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7633 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7634 scaletype
= TREE_VALUE (arglist
);
7636 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7637 && TREE_CODE (rettype
) == VOID_TYPE
);
7639 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7640 if (!is_gimple_min_invariant (ptr
))
7642 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7643 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7644 gcc_assert (!new_bb
);
7647 if (mask
== NULL_TREE
)
7649 mask_arg
= build_int_cst (masktype
, -1);
7650 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7651 mask_arg
, masktype
, NULL
);
7654 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7656 prev_stmt_info
= NULL
;
7657 for (j
= 0; j
< ncopies
; ++j
)
7661 src
= vec_oprnd1
= vect_get_vec_def_for_operand (vinfo
,
7663 op
= vec_oprnd0
= vect_get_vec_def_for_operand (vinfo
,
7667 mask_op
= vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
,
7670 else if (modifier
!= NONE
&& (j
& 1))
7672 if (modifier
== WIDEN
)
7675 = vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7677 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7678 perm_mask
, stmt_info
, gsi
);
7681 = vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7684 else if (modifier
== NARROW
)
7686 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7687 perm_mask
, stmt_info
, gsi
);
7688 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7696 src
= vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7698 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7701 mask_op
= vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7705 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7707 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7708 TYPE_VECTOR_SUBPARTS (srctype
)));
7709 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7710 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7712 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7713 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7717 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7719 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7720 TYPE_VECTOR_SUBPARTS (idxtype
)));
7721 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7722 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7724 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7725 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7733 if (modifier
== NARROW
)
7735 var
= vect_get_new_ssa_name (mask_halfvectype
,
7738 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7739 : VEC_UNPACK_LO_EXPR
,
7741 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7744 tree optype
= TREE_TYPE (mask_arg
);
7745 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7748 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7749 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7750 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7752 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7753 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7755 if (!useless_type_conversion_p (masktype
, utype
))
7757 gcc_assert (TYPE_PRECISION (utype
)
7758 <= TYPE_PRECISION (masktype
));
7759 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7760 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7761 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7767 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7768 stmt_vec_info new_stmt_info
7769 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7771 if (prev_stmt_info
== NULL
)
7772 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7774 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7775 prev_stmt_info
= new_stmt_info
;
7779 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7780 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7782 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7783 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7788 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7790 /* We vectorize all the stmts of the interleaving group when we
7791 reach the last stmt in the group. */
7792 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7793 < DR_GROUP_SIZE (first_stmt_info
)
7802 grouped_store
= false;
7803 /* VEC_NUM is the number of vect stmts to be created for this
7805 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7806 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7807 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7808 == first_stmt_info
);
7809 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7810 op
= vect_get_store_rhs (first_stmt_info
);
7813 /* VEC_NUM is the number of vect stmts to be created for this
7815 vec_num
= group_size
;
7817 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7820 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7822 if (dump_enabled_p ())
7823 dump_printf_loc (MSG_NOTE
, vect_location
,
7824 "transform store. ncopies = %d\n", ncopies
);
7826 if (memory_access_type
== VMAT_ELEMENTWISE
7827 || memory_access_type
== VMAT_STRIDED_SLP
)
7829 gimple_stmt_iterator incr_gsi
;
7835 tree stride_base
, stride_step
, alias_off
;
7839 /* Checked by get_load_store_type. */
7840 unsigned int const_nunits
= nunits
.to_constant ();
7842 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7843 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7845 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7847 = fold_build_pointer_plus
7848 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7849 size_binop (PLUS_EXPR
,
7850 convert_to_ptrofftype (dr_offset
),
7851 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7852 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7854 /* For a store with loop-invariant (but other than power-of-2)
7855 stride (i.e. not a grouped access) like so:
7857 for (i = 0; i < n; i += stride)
7860 we generate a new induction variable and new stores from
7861 the components of the (vectorized) rhs:
7863 for (j = 0; ; j += VF*stride)
7868 array[j + stride] = tmp2;
7872 unsigned nstores
= const_nunits
;
7874 tree ltype
= elem_type
;
7875 tree lvectype
= vectype
;
7878 if (group_size
< const_nunits
7879 && const_nunits
% group_size
== 0)
7881 nstores
= const_nunits
/ group_size
;
7883 ltype
= build_vector_type (elem_type
, group_size
);
7886 /* First check if vec_extract optab doesn't support extraction
7887 of vector elts directly. */
7888 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7890 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7891 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7892 group_size
).exists (&vmode
)
7893 || (convert_optab_handler (vec_extract_optab
,
7894 TYPE_MODE (vectype
), vmode
)
7895 == CODE_FOR_nothing
))
7897 /* Try to avoid emitting an extract of vector elements
7898 by performing the extracts using an integer type of the
7899 same size, extracting from a vector of those and then
7900 re-interpreting it as the original vector type if
7903 = group_size
* GET_MODE_BITSIZE (elmode
);
7904 unsigned int lnunits
= const_nunits
/ group_size
;
7905 /* If we can't construct such a vector fall back to
7906 element extracts from the original vector type and
7907 element size stores. */
7908 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7909 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7910 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7911 lnunits
).exists (&vmode
)
7912 && (convert_optab_handler (vec_extract_optab
,
7914 != CODE_FOR_nothing
))
7918 ltype
= build_nonstandard_integer_type (lsize
, 1);
7919 lvectype
= build_vector_type (ltype
, nstores
);
7921 /* Else fall back to vector extraction anyway.
7922 Fewer stores are more important than avoiding spilling
7923 of the vector we extract from. Compared to the
7924 construction case in vectorizable_load no store-forwarding
7925 issue exists here for reasonable archs. */
7928 else if (group_size
>= const_nunits
7929 && group_size
% const_nunits
== 0)
7932 lnel
= const_nunits
;
7936 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7937 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7940 ivstep
= stride_step
;
7941 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7942 build_int_cst (TREE_TYPE (ivstep
), vf
));
7944 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7946 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7947 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7948 create_iv (stride_base
, ivstep
, NULL
,
7949 loop
, &incr_gsi
, insert_after
,
7951 incr
= gsi_stmt (incr_gsi
);
7952 loop_vinfo
->add_stmt (incr
);
7954 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7956 prev_stmt_info
= NULL
;
7957 alias_off
= build_int_cst (ref_type
, 0);
7958 stmt_vec_info next_stmt_info
= first_stmt_info
;
7959 for (g
= 0; g
< group_size
; g
++)
7961 running_off
= offvar
;
7964 tree size
= TYPE_SIZE_UNIT (ltype
);
7965 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7967 tree newoff
= copy_ssa_name (running_off
, NULL
);
7968 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7970 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7971 running_off
= newoff
;
7973 unsigned int group_el
= 0;
7974 unsigned HOST_WIDE_INT
7975 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7976 for (j
= 0; j
< ncopies
; j
++)
7978 /* We've set op and dt above, from vect_get_store_rhs,
7979 and first_stmt_info == stmt_info. */
7984 vect_get_vec_defs (vinfo
, op
, NULL_TREE
, stmt_info
,
7985 &vec_oprnds
, NULL
, slp_node
);
7986 vec_oprnd
= vec_oprnds
[0];
7990 op
= vect_get_store_rhs (next_stmt_info
);
7991 vec_oprnd
= vect_get_vec_def_for_operand
7992 (vinfo
, op
, next_stmt_info
);
7998 vec_oprnd
= vec_oprnds
[j
];
8000 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
8003 /* Pun the vector to extract from if necessary. */
8004 if (lvectype
!= vectype
)
8006 tree tem
= make_ssa_name (lvectype
);
8008 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
8009 lvectype
, vec_oprnd
));
8010 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8013 for (i
= 0; i
< nstores
; i
++)
8015 tree newref
, newoff
;
8016 gimple
*incr
, *assign
;
8017 tree size
= TYPE_SIZE (ltype
);
8018 /* Extract the i'th component. */
8019 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8020 bitsize_int (i
), size
);
8021 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8024 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8028 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8030 newref
= build2 (MEM_REF
, ltype
,
8031 running_off
, this_off
);
8032 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8034 /* And store it to *running_off. */
8035 assign
= gimple_build_assign (newref
, elem
);
8036 stmt_vec_info assign_info
8037 = vect_finish_stmt_generation (vinfo
, stmt_info
,
8042 || group_el
== group_size
)
8044 newoff
= copy_ssa_name (running_off
, NULL
);
8045 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8046 running_off
, stride_step
);
8047 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8049 running_off
= newoff
;
8052 if (g
== group_size
- 1
8055 if (j
== 0 && i
== 0)
8056 STMT_VINFO_VEC_STMT (stmt_info
)
8057 = *vec_stmt
= assign_info
;
8059 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
8060 prev_stmt_info
= assign_info
;
8064 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8069 vec_oprnds
.release ();
8073 auto_vec
<tree
> dr_chain (group_size
);
8074 oprnds
.create (group_size
);
8076 /* Gather-scatter accesses perform only component accesses, alignment
8077 is irrelevant for them. */
8078 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8079 alignment_support_scheme
= dr_unaligned_supported
;
8081 alignment_support_scheme
8082 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
8084 gcc_assert (alignment_support_scheme
);
8085 vec_loop_masks
*loop_masks
8086 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8087 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8089 /* Targets with store-lane instructions must not require explicit
8090 realignment. vect_supportable_dr_alignment always returns either
8091 dr_aligned or dr_unaligned_supported for masked operations. */
8092 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8095 || alignment_support_scheme
== dr_aligned
8096 || alignment_support_scheme
== dr_unaligned_supported
);
8098 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
8099 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8100 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8103 tree vec_offset
= NULL_TREE
;
8104 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8106 aggr_type
= NULL_TREE
;
8109 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8111 aggr_type
= elem_type
;
8112 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8113 &bump
, &vec_offset
);
8117 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8118 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8120 aggr_type
= vectype
;
8121 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8122 memory_access_type
);
8126 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8128 /* In case the vectorization factor (VF) is bigger than the number
8129 of elements that we can fit in a vectype (nunits), we have to generate
8130 more than one vector stmt - i.e - we need to "unroll" the
8131 vector stmt by a factor VF/nunits. For more details see documentation in
8132 vect_get_vec_def_for_copy_stmt. */
8134 /* In case of interleaving (non-unit grouped access):
8141 We create vectorized stores starting from base address (the access of the
8142 first stmt in the chain (S2 in the above example), when the last store stmt
8143 of the chain (S4) is reached:
8146 VS2: &base + vec_size*1 = vx0
8147 VS3: &base + vec_size*2 = vx1
8148 VS4: &base + vec_size*3 = vx3
8150 Then permutation statements are generated:
8152 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8153 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8156 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8157 (the order of the data-refs in the output of vect_permute_store_chain
8158 corresponds to the order of scalar stmts in the interleaving chain - see
8159 the documentation of vect_permute_store_chain()).
8161 In case of both multiple types and interleaving, above vector stores and
8162 permutation stmts are created for every copy. The result vector stmts are
8163 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8164 STMT_VINFO_RELATED_STMT for the next copies.
8167 prev_stmt_info
= NULL
;
8168 tree vec_mask
= NULL_TREE
;
8169 for (j
= 0; j
< ncopies
; j
++)
8171 stmt_vec_info new_stmt_info
;
8176 /* Get vectorized arguments for SLP_NODE. */
8177 vect_get_vec_defs (vinfo
, op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
8180 vec_oprnd
= vec_oprnds
[0];
8184 /* For interleaved stores we collect vectorized defs for all the
8185 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8186 used as an input to vect_permute_store_chain(), and OPRNDS as
8187 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8189 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8190 OPRNDS are of size 1. */
8191 stmt_vec_info next_stmt_info
= first_stmt_info
;
8192 for (i
= 0; i
< group_size
; i
++)
8194 /* Since gaps are not supported for interleaved stores,
8195 DR_GROUP_SIZE is the exact number of stmts in the chain.
8196 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8197 that there is no interleaving, DR_GROUP_SIZE is 1,
8198 and only one iteration of the loop will be executed. */
8199 op
= vect_get_store_rhs (next_stmt_info
);
8200 vec_oprnd
= vect_get_vec_def_for_operand
8201 (vinfo
, op
, next_stmt_info
);
8202 dr_chain
.quick_push (vec_oprnd
);
8203 oprnds
.quick_push (vec_oprnd
);
8204 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8207 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
,
8211 /* We should have catched mismatched types earlier. */
8212 gcc_assert (useless_type_conversion_p (vectype
,
8213 TREE_TYPE (vec_oprnd
)));
8214 bool simd_lane_access_p
8215 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8216 if (simd_lane_access_p
8218 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8219 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8220 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8221 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8222 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8223 get_alias_set (TREE_TYPE (ref_type
))))
8225 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8226 dataref_offset
= build_int_cst (ref_type
, 0);
8228 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8229 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
8230 &dataref_ptr
, &vec_offset
);
8233 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8234 simd_lane_access_p
? loop
: NULL
,
8235 offset
, &dummy
, gsi
, &ptr_incr
,
8236 simd_lane_access_p
, NULL_TREE
, bump
);
8240 /* For interleaved stores we created vectorized defs for all the
8241 defs stored in OPRNDS in the previous iteration (previous copy).
8242 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8243 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8245 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8246 OPRNDS are of size 1. */
8247 for (i
= 0; i
< group_size
; i
++)
8250 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
8251 dr_chain
[i
] = vec_oprnd
;
8252 oprnds
[i
] = vec_oprnd
;
8255 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8258 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8259 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8260 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8262 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8266 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8270 /* Get an array into which we can store the individual vectors. */
8271 vec_array
= create_vector_array (vectype
, vec_num
);
8273 /* Invalidate the current contents of VEC_ARRAY. This should
8274 become an RTL clobber too, which prevents the vector registers
8275 from being upward-exposed. */
8276 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8278 /* Store the individual vectors into the array. */
8279 for (i
= 0; i
< vec_num
; i
++)
8281 vec_oprnd
= dr_chain
[i
];
8282 write_vector_array (vinfo
, stmt_info
,
8283 gsi
, vec_oprnd
, vec_array
, i
);
8286 tree final_mask
= NULL
;
8288 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8291 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8298 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8300 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8301 tree alias_ptr
= build_int_cst (ref_type
, align
);
8302 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8303 dataref_ptr
, alias_ptr
,
8304 final_mask
, vec_array
);
8309 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8310 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8311 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8313 gimple_call_set_lhs (call
, data_ref
);
8315 gimple_call_set_nothrow (call
, true);
8316 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
8319 /* Record that VEC_ARRAY is now dead. */
8320 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8324 new_stmt_info
= NULL
;
8328 result_chain
.create (group_size
);
8330 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8331 gsi
, &result_chain
);
8334 stmt_vec_info next_stmt_info
= first_stmt_info
;
8335 for (i
= 0; i
< vec_num
; i
++)
8338 unsigned HOST_WIDE_INT align
;
8340 tree final_mask
= NULL_TREE
;
8342 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8344 vectype
, vec_num
* j
+ i
);
8346 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8349 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8351 tree scale
= size_int (gs_info
.scale
);
8354 call
= gimple_build_call_internal
8355 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8356 scale
, vec_oprnd
, final_mask
);
8358 call
= gimple_build_call_internal
8359 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8361 gimple_call_set_nothrow (call
, true);
8363 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8368 /* Bump the vector pointer. */
8369 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8370 gsi
, stmt_info
, bump
);
8373 vec_oprnd
= vec_oprnds
[i
];
8374 else if (grouped_store
)
8375 /* For grouped stores vectorized defs are interleaved in
8376 vect_permute_store_chain(). */
8377 vec_oprnd
= result_chain
[i
];
8379 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8380 if (aligned_access_p (first_dr_info
))
8382 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8384 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8388 misalign
= DR_MISALIGNMENT (first_dr_info
);
8389 if (dataref_offset
== NULL_TREE
8390 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8391 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8394 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8396 tree perm_mask
= perm_mask_for_reverse (vectype
);
8397 tree perm_dest
= vect_create_destination_var
8398 (vect_get_store_rhs (stmt_info
), vectype
);
8399 tree new_temp
= make_ssa_name (perm_dest
);
8401 /* Generate the permute statement. */
8403 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8404 vec_oprnd
, perm_mask
);
8405 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8407 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8408 vec_oprnd
= new_temp
;
8411 /* Arguments are ready. Create the new vector stmt. */
8414 align
= least_bit_hwi (misalign
| align
);
8415 tree ptr
= build_int_cst (ref_type
, align
);
8417 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8419 final_mask
, vec_oprnd
);
8420 gimple_call_set_nothrow (call
, true);
8422 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8426 data_ref
= fold_build2 (MEM_REF
, vectype
,
8430 : build_int_cst (ref_type
, 0));
8431 if (aligned_access_p (first_dr_info
))
8433 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8434 TREE_TYPE (data_ref
)
8435 = build_aligned_type (TREE_TYPE (data_ref
),
8436 align
* BITS_PER_UNIT
);
8438 TREE_TYPE (data_ref
)
8439 = build_aligned_type (TREE_TYPE (data_ref
),
8440 TYPE_ALIGN (elem_type
));
8441 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8443 = gimple_build_assign (data_ref
, vec_oprnd
);
8445 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8451 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8452 if (!next_stmt_info
)
8459 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8461 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8462 prev_stmt_info
= new_stmt_info
;
8467 result_chain
.release ();
8468 vec_oprnds
.release ();
8473 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8474 VECTOR_CST mask. No checks are made that the target platform supports the
8475 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8476 vect_gen_perm_mask_checked. */
8479 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8483 poly_uint64 nunits
= sel
.length ();
8484 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8486 mask_type
= build_vector_type (ssizetype
, nunits
);
8487 return vec_perm_indices_to_tree (mask_type
, sel
);
8490 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8491 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8494 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8496 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8497 return vect_gen_perm_mask_any (vectype
, sel
);
8500 /* Given a vector variable X and Y, that was generated for the scalar
8501 STMT_INFO, generate instructions to permute the vector elements of X and Y
8502 using permutation mask MASK_VEC, insert them at *GSI and return the
8503 permuted vector variable. */
8506 permute_vec_elements (vec_info
*vinfo
,
8507 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8508 gimple_stmt_iterator
*gsi
)
8510 tree vectype
= TREE_TYPE (x
);
8511 tree perm_dest
, data_ref
;
8514 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8515 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8516 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8518 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8519 data_ref
= make_ssa_name (perm_dest
);
8521 /* Generate the permute statement. */
8522 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8523 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8528 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8529 inserting them on the loops preheader edge. Returns true if we
8530 were successful in doing so (and thus STMT_INFO can be moved then),
8531 otherwise returns false. */
8534 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8540 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8542 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8543 if (!gimple_nop_p (def_stmt
)
8544 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8546 /* Make sure we don't need to recurse. While we could do
8547 so in simple cases when there are more complex use webs
8548 we don't have an easy way to preserve stmt order to fulfil
8549 dependencies within them. */
8552 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8554 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8556 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8557 if (!gimple_nop_p (def_stmt2
)
8558 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8568 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8570 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8571 if (!gimple_nop_p (def_stmt
)
8572 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8574 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8575 gsi_remove (&gsi
, false);
8576 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8583 /* vectorizable_load.
8585 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8586 that can be vectorized.
8587 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8588 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8589 Return true if STMT_INFO is vectorizable in this way. */
8592 vectorizable_load (vec_info
*vinfo
,
8593 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8594 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
8595 stmt_vector_for_cost
*cost_vec
)
8598 tree vec_dest
= NULL
;
8599 tree data_ref
= NULL
;
8600 stmt_vec_info prev_stmt_info
;
8601 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8602 class loop
*loop
= NULL
;
8603 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8604 bool nested_in_vect_loop
= false;
8609 enum dr_alignment_support alignment_support_scheme
;
8610 tree dataref_ptr
= NULL_TREE
;
8611 tree dataref_offset
= NULL_TREE
;
8612 gimple
*ptr_incr
= NULL
;
8615 unsigned int group_size
;
8616 poly_uint64 group_gap_adj
;
8617 tree msq
= NULL_TREE
, lsq
;
8618 tree offset
= NULL_TREE
;
8619 tree byte_offset
= NULL_TREE
;
8620 tree realignment_token
= NULL_TREE
;
8622 vec
<tree
> dr_chain
= vNULL
;
8623 bool grouped_load
= false;
8624 stmt_vec_info first_stmt_info
;
8625 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8626 bool compute_in_loop
= false;
8627 class loop
*at_loop
;
8629 bool slp
= (slp_node
!= NULL
);
8630 bool slp_perm
= false;
8631 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8634 gather_scatter_info gs_info
;
8636 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8638 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8641 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8645 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8646 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8648 scalar_dest
= gimple_assign_lhs (assign
);
8649 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8652 tree_code code
= gimple_assign_rhs_code (assign
);
8653 if (code
!= ARRAY_REF
8654 && code
!= BIT_FIELD_REF
8655 && code
!= INDIRECT_REF
8656 && code
!= COMPONENT_REF
8657 && code
!= IMAGPART_EXPR
8658 && code
!= REALPART_EXPR
8660 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8665 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8666 if (!call
|| !gimple_call_internal_p (call
))
8669 internal_fn ifn
= gimple_call_internal_fn (call
);
8670 if (!internal_load_fn_p (ifn
))
8673 scalar_dest
= gimple_call_lhs (call
);
8677 int mask_index
= internal_fn_mask_index (ifn
);
8678 if (mask_index
>= 0)
8680 mask
= gimple_call_arg (call
, mask_index
);
8681 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
8687 if (!STMT_VINFO_DATA_REF (stmt_info
))
8690 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8691 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8695 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8696 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8697 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8702 /* Multiple types in SLP are handled by creating the appropriate number of
8703 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8708 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8710 gcc_assert (ncopies
>= 1);
8712 /* FORNOW. This restriction should be relaxed. */
8713 if (nested_in_vect_loop
&& ncopies
> 1)
8715 if (dump_enabled_p ())
8716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8717 "multiple types in nested loop.\n");
8721 /* Invalidate assumptions made by dependence analysis when vectorization
8722 on the unrolled body effectively re-orders stmts. */
8724 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8725 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8726 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8728 if (dump_enabled_p ())
8729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8730 "cannot perform implicit CSE when unrolling "
8731 "with negative dependence distance\n");
8735 elem_type
= TREE_TYPE (vectype
);
8736 mode
= TYPE_MODE (vectype
);
8738 /* FORNOW. In some cases can vectorize even if data-type not supported
8739 (e.g. - data copies). */
8740 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8742 if (dump_enabled_p ())
8743 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8744 "Aligned load, but unsupported type.\n");
8748 /* Check if the load is a part of an interleaving chain. */
8749 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8751 grouped_load
= true;
8753 gcc_assert (!nested_in_vect_loop
);
8754 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8756 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8757 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8759 /* Refuse non-SLP vectorization of SLP-only groups. */
8760 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8762 if (dump_enabled_p ())
8763 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8764 "cannot vectorize load in non-SLP mode.\n");
8768 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8774 /* In BB vectorization we may not actually use a loaded vector
8775 accessing elements in excess of DR_GROUP_SIZE. */
8776 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8777 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8778 unsigned HOST_WIDE_INT nunits
;
8779 unsigned j
, k
, maxk
= 0;
8780 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8783 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
8784 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8785 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8787 if (dump_enabled_p ())
8788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8789 "BB vectorization with gaps at the end of "
8790 "a load is not supported\n");
8797 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8800 if (dump_enabled_p ())
8801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8803 "unsupported load permutation\n");
8808 /* Invalidate assumptions made by dependence analysis when vectorization
8809 on the unrolled body effectively re-orders stmts. */
8810 if (!PURE_SLP_STMT (stmt_info
)
8811 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8812 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8813 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8815 if (dump_enabled_p ())
8816 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8817 "cannot perform implicit CSE when performing "
8818 "group loads with negative dependence distance\n");
8825 vect_memory_access_type memory_access_type
;
8826 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp
, mask
, VLS_LOAD
,
8827 ncopies
, &memory_access_type
, &gs_info
))
8832 if (memory_access_type
== VMAT_CONTIGUOUS
)
8834 machine_mode vec_mode
= TYPE_MODE (vectype
);
8835 if (!VECTOR_MODE_P (vec_mode
)
8836 || !can_vec_mask_load_store_p (vec_mode
,
8837 TYPE_MODE (mask_vectype
), true))
8840 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8841 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8843 if (dump_enabled_p ())
8844 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8845 "unsupported access type for masked load.\n");
8850 if (!vec_stmt
) /* transformation not required. */
8853 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8856 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
8857 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
8858 memory_access_type
, &gs_info
, mask
);
8860 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8861 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8862 slp_node
, cost_vec
);
8867 gcc_assert (memory_access_type
8868 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8870 if (dump_enabled_p ())
8871 dump_printf_loc (MSG_NOTE
, vect_location
,
8872 "transform load. ncopies = %d\n", ncopies
);
8876 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8877 ensure_base_align (dr_info
);
8879 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8881 vect_build_gather_load_calls (vinfo
,
8882 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8886 if (memory_access_type
== VMAT_INVARIANT
)
8888 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8889 /* If we have versioned for aliasing or the loop doesn't
8890 have any data dependencies that would preclude this,
8891 then we are sure this is a loop invariant load and
8892 thus we can insert it on the preheader edge. */
8893 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8894 && !nested_in_vect_loop
8895 && hoist_defs_of_uses (stmt_info
, loop
));
8898 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8899 if (dump_enabled_p ())
8900 dump_printf_loc (MSG_NOTE
, vect_location
,
8901 "hoisting out of the vectorized loop: %G", stmt
);
8902 scalar_dest
= copy_ssa_name (scalar_dest
);
8903 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8904 gsi_insert_on_edge_immediate
8905 (loop_preheader_edge (loop
),
8906 gimple_build_assign (scalar_dest
, rhs
));
8908 /* These copies are all equivalent, but currently the representation
8909 requires a separate STMT_VINFO_VEC_STMT for each one. */
8910 prev_stmt_info
= NULL
;
8911 gimple_stmt_iterator gsi2
= *gsi
;
8913 for (j
= 0; j
< ncopies
; j
++)
8915 stmt_vec_info new_stmt_info
;
8918 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8920 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8921 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
8925 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8927 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8930 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8932 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8934 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8935 prev_stmt_info
= new_stmt_info
;
8940 if (memory_access_type
== VMAT_ELEMENTWISE
8941 || memory_access_type
== VMAT_STRIDED_SLP
)
8943 gimple_stmt_iterator incr_gsi
;
8949 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8950 tree stride_base
, stride_step
, alias_off
;
8951 /* Checked by get_load_store_type. */
8952 unsigned int const_nunits
= nunits
.to_constant ();
8953 unsigned HOST_WIDE_INT cst_offset
= 0;
8956 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8957 gcc_assert (!nested_in_vect_loop
);
8961 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8962 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8966 first_stmt_info
= stmt_info
;
8967 first_dr_info
= dr_info
;
8969 if (slp
&& grouped_load
)
8971 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8972 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8978 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8979 * vect_get_place_in_interleaving_chain (stmt_info
,
8982 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8985 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8987 = fold_build_pointer_plus
8988 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8989 size_binop (PLUS_EXPR
,
8990 convert_to_ptrofftype (dr_offset
),
8991 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8992 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8994 /* For a load with loop-invariant (but other than power-of-2)
8995 stride (i.e. not a grouped access) like so:
8997 for (i = 0; i < n; i += stride)
9000 we generate a new induction variable and new accesses to
9001 form a new vector (or vectors, depending on ncopies):
9003 for (j = 0; ; j += VF*stride)
9005 tmp2 = array[j + stride];
9007 vectemp = {tmp1, tmp2, ...}
9010 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
9011 build_int_cst (TREE_TYPE (stride_step
), vf
));
9013 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
9015 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
9016 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
9017 create_iv (stride_base
, ivstep
, NULL
,
9018 loop
, &incr_gsi
, insert_after
,
9020 incr
= gsi_stmt (incr_gsi
);
9021 loop_vinfo
->add_stmt (incr
);
9023 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9025 prev_stmt_info
= NULL
;
9026 running_off
= offvar
;
9027 alias_off
= build_int_cst (ref_type
, 0);
9028 int nloads
= const_nunits
;
9030 tree ltype
= TREE_TYPE (vectype
);
9031 tree lvectype
= vectype
;
9032 auto_vec
<tree
> dr_chain
;
9033 if (memory_access_type
== VMAT_STRIDED_SLP
)
9035 if (group_size
< const_nunits
)
9037 /* First check if vec_init optab supports construction from vector
9038 elts directly. Otherwise avoid emitting a constructor of
9039 vector elements by performing the loads using an integer type
9040 of the same size, constructing a vector of those and then
9041 re-interpreting it as the original vector type. This avoids a
9042 huge runtime penalty due to the general inability to perform
9043 store forwarding from smaller stores to a larger load. */
9046 = vector_vector_composition_type (vectype
,
9047 const_nunits
/ group_size
,
9049 if (vtype
!= NULL_TREE
)
9051 nloads
= const_nunits
/ group_size
;
9060 lnel
= const_nunits
;
9063 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9065 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9066 else if (nloads
== 1)
9071 /* For SLP permutation support we need to load the whole group,
9072 not only the number of vector stmts the permutation result
9076 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9078 unsigned int const_vf
= vf
.to_constant ();
9079 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9080 dr_chain
.create (ncopies
);
9083 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9085 unsigned int group_el
= 0;
9086 unsigned HOST_WIDE_INT
9087 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9088 for (j
= 0; j
< ncopies
; j
++)
9091 vec_alloc (v
, nloads
);
9092 stmt_vec_info new_stmt_info
= NULL
;
9093 for (i
= 0; i
< nloads
; i
++)
9095 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9096 group_el
* elsz
+ cst_offset
);
9097 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9098 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9100 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9102 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9104 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9105 gimple_assign_lhs (new_stmt
));
9109 || group_el
== group_size
)
9111 tree newoff
= copy_ssa_name (running_off
);
9112 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9113 running_off
, stride_step
);
9114 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9116 running_off
= newoff
;
9122 tree vec_inv
= build_constructor (lvectype
, v
);
9123 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9124 vec_inv
, lvectype
, gsi
);
9125 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9126 if (lvectype
!= vectype
)
9129 = gimple_build_assign (make_ssa_name (vectype
),
9131 build1 (VIEW_CONVERT_EXPR
,
9132 vectype
, new_temp
));
9134 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9142 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
9144 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9149 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9151 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9152 prev_stmt_info
= new_stmt_info
;
9158 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9164 if (memory_access_type
== VMAT_GATHER_SCATTER
9165 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9166 grouped_load
= false;
9170 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9171 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9172 /* For SLP vectorization we directly vectorize a subchain
9173 without permutation. */
9174 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9175 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9176 /* For BB vectorization always use the first stmt to base
9177 the data ref pointer on. */
9179 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9181 /* Check if the chain of loads is already vectorized. */
9182 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
9183 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9184 ??? But we can only do so if there is exactly one
9185 as we have no way to get at the rest. Leave the CSE
9187 ??? With the group load eventually participating
9188 in multiple different permutations (having multiple
9189 slp nodes which refer to the same group) the CSE
9190 is even wrong code. See PR56270. */
9193 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9196 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9199 /* VEC_NUM is the number of vect stmts to be created for this group. */
9202 grouped_load
= false;
9203 /* If an SLP permutation is from N elements to N elements,
9204 and if one vector holds a whole number of N, we can load
9205 the inputs to the permutation in the same way as an
9206 unpermuted sequence. In other cases we need to load the
9207 whole group, not only the number of vector stmts the
9208 permutation result fits in. */
9209 unsigned scalar_lanes
= SLP_TREE_SCALAR_STMTS (slp_node
).length ();
9211 && (group_size
!= scalar_lanes
9212 || !multiple_p (nunits
, group_size
)))
9214 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9215 variable VF; see vect_transform_slp_perm_load. */
9216 unsigned int const_vf
= vf
.to_constant ();
9217 unsigned int const_nunits
= nunits
.to_constant ();
9218 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9219 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9223 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9225 = group_size
- scalar_lanes
;
9229 vec_num
= group_size
;
9231 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9235 first_stmt_info
= stmt_info
;
9236 first_dr_info
= dr_info
;
9237 group_size
= vec_num
= 1;
9239 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9242 /* Gather-scatter accesses perform only component accesses, alignment
9243 is irrelevant for them. */
9244 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9245 alignment_support_scheme
= dr_unaligned_supported
;
9247 alignment_support_scheme
9248 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
9250 gcc_assert (alignment_support_scheme
);
9251 vec_loop_masks
*loop_masks
9252 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9253 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9255 /* Targets with store-lane instructions must not require explicit
9256 realignment. vect_supportable_dr_alignment always returns either
9257 dr_aligned or dr_unaligned_supported for masked operations. */
9258 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9261 || alignment_support_scheme
== dr_aligned
9262 || alignment_support_scheme
== dr_unaligned_supported
);
9264 /* In case the vectorization factor (VF) is bigger than the number
9265 of elements that we can fit in a vectype (nunits), we have to generate
9266 more than one vector stmt - i.e - we need to "unroll" the
9267 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9268 from one copy of the vector stmt to the next, in the field
9269 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9270 stages to find the correct vector defs to be used when vectorizing
9271 stmts that use the defs of the current stmt. The example below
9272 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9273 need to create 4 vectorized stmts):
9275 before vectorization:
9276 RELATED_STMT VEC_STMT
9280 step 1: vectorize stmt S1:
9281 We first create the vector stmt VS1_0, and, as usual, record a
9282 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9283 Next, we create the vector stmt VS1_1, and record a pointer to
9284 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9285 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9287 RELATED_STMT VEC_STMT
9288 VS1_0: vx0 = memref0 VS1_1 -
9289 VS1_1: vx1 = memref1 VS1_2 -
9290 VS1_2: vx2 = memref2 VS1_3 -
9291 VS1_3: vx3 = memref3 - -
9292 S1: x = load - VS1_0
9295 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9296 information we recorded in RELATED_STMT field is used to vectorize
9299 /* In case of interleaving (non-unit grouped access):
9306 Vectorized loads are created in the order of memory accesses
9307 starting from the access of the first stmt of the chain:
9310 VS2: vx1 = &base + vec_size*1
9311 VS3: vx3 = &base + vec_size*2
9312 VS4: vx4 = &base + vec_size*3
9314 Then permutation statements are generated:
9316 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9317 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9320 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9321 (the order of the data-refs in the output of vect_permute_load_chain
9322 corresponds to the order of scalar stmts in the interleaving chain - see
9323 the documentation of vect_permute_load_chain()).
9324 The generation of permutation stmts and recording them in
9325 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9327 In case of both multiple types and interleaving, the vector loads and
9328 permutation stmts above are created for every copy. The result vector
9329 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9330 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9332 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9333 on a target that supports unaligned accesses (dr_unaligned_supported)
9334 we generate the following code:
9338 p = p + indx * vectype_size;
9343 Otherwise, the data reference is potentially unaligned on a target that
9344 does not support unaligned accesses (dr_explicit_realign_optimized) -
9345 then generate the following code, in which the data in each iteration is
9346 obtained by two vector loads, one from the previous iteration, and one
9347 from the current iteration:
9349 msq_init = *(floor(p1))
9350 p2 = initial_addr + VS - 1;
9351 realignment_token = call target_builtin;
9354 p2 = p2 + indx * vectype_size
9356 vec_dest = realign_load (msq, lsq, realignment_token)
9361 /* If the misalignment remains the same throughout the execution of the
9362 loop, we can create the init_addr and permutation mask at the loop
9363 preheader. Otherwise, it needs to be created inside the loop.
9364 This can only occur when vectorizing memory accesses in the inner-loop
9365 nested within an outer-loop that is being vectorized. */
9367 if (nested_in_vect_loop
9368 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9369 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9371 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9372 compute_in_loop
= true;
9375 bool diff_first_stmt_info
9376 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9378 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9379 || alignment_support_scheme
== dr_explicit_realign
)
9380 && !compute_in_loop
)
9382 /* If we have different first_stmt_info, we can't set up realignment
9383 here, since we can't guarantee first_stmt_info DR has been
9384 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9385 distance from first_stmt_info DR instead as below. */
9386 if (!diff_first_stmt_info
)
9387 msq
= vect_setup_realignment (vinfo
,
9388 first_stmt_info
, gsi
, &realignment_token
,
9389 alignment_support_scheme
, NULL_TREE
,
9391 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9393 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9394 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9396 gcc_assert (!first_stmt_info_for_drptr
);
9402 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9403 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9406 tree vec_offset
= NULL_TREE
;
9407 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9409 aggr_type
= NULL_TREE
;
9412 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9414 aggr_type
= elem_type
;
9415 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9416 &bump
, &vec_offset
);
9420 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9421 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9423 aggr_type
= vectype
;
9424 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9425 memory_access_type
);
9428 tree vec_mask
= NULL_TREE
;
9429 prev_stmt_info
= NULL
;
9430 poly_uint64 group_elt
= 0;
9431 for (j
= 0; j
< ncopies
; j
++)
9433 stmt_vec_info new_stmt_info
= NULL
;
9434 /* 1. Create the vector or array pointer update chain. */
9437 bool simd_lane_access_p
9438 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9439 if (simd_lane_access_p
9440 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9441 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9442 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9443 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9444 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9445 get_alias_set (TREE_TYPE (ref_type
)))
9446 && (alignment_support_scheme
== dr_aligned
9447 || alignment_support_scheme
== dr_unaligned_supported
))
9449 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9450 dataref_offset
= build_int_cst (ref_type
, 0);
9452 else if (diff_first_stmt_info
)
9455 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9456 aggr_type
, at_loop
, offset
, &dummy
,
9457 gsi
, &ptr_incr
, simd_lane_access_p
,
9459 /* Adjust the pointer by the difference to first_stmt. */
9460 data_reference_p ptrdr
9461 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9463 = fold_convert (sizetype
,
9464 size_binop (MINUS_EXPR
,
9465 DR_INIT (first_dr_info
->dr
),
9467 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9469 if (alignment_support_scheme
== dr_explicit_realign
)
9471 msq
= vect_setup_realignment (vinfo
,
9472 first_stmt_info_for_drptr
, gsi
,
9474 alignment_support_scheme
,
9475 dataref_ptr
, &at_loop
);
9476 gcc_assert (!compute_in_loop
);
9479 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9480 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
9481 &dataref_ptr
, &vec_offset
);
9484 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9486 offset
, &dummy
, gsi
, &ptr_incr
,
9493 auto_vec
<vec
<tree
> > vec_defs (1);
9494 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
9495 vec_mask
= vec_defs
[0][0];
9498 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
,
9505 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9507 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9508 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
9510 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9513 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
9516 if (grouped_load
|| slp_perm
)
9517 dr_chain
.create (vec_num
);
9519 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9523 vec_array
= create_vector_array (vectype
, vec_num
);
9525 tree final_mask
= NULL_TREE
;
9527 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9530 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9537 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9539 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9540 tree alias_ptr
= build_int_cst (ref_type
, align
);
9541 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9542 dataref_ptr
, alias_ptr
,
9548 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9549 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9550 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9552 gimple_call_set_lhs (call
, vec_array
);
9553 gimple_call_set_nothrow (call
, true);
9554 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
9557 /* Extract each vector into an SSA_NAME. */
9558 for (i
= 0; i
< vec_num
; i
++)
9560 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9562 dr_chain
.quick_push (new_temp
);
9565 /* Record the mapping between SSA_NAMEs and statements. */
9566 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9568 /* Record that VEC_ARRAY is now dead. */
9569 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9573 for (i
= 0; i
< vec_num
; i
++)
9575 tree final_mask
= NULL_TREE
;
9577 && memory_access_type
!= VMAT_INVARIANT
)
9578 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9580 vectype
, vec_num
* j
+ i
);
9582 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9586 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9587 gsi
, stmt_info
, bump
);
9589 /* 2. Create the vector-load in the loop. */
9590 gimple
*new_stmt
= NULL
;
9591 switch (alignment_support_scheme
)
9594 case dr_unaligned_supported
:
9596 unsigned int misalign
;
9597 unsigned HOST_WIDE_INT align
;
9599 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9601 tree zero
= build_zero_cst (vectype
);
9602 tree scale
= size_int (gs_info
.scale
);
9605 call
= gimple_build_call_internal
9606 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9607 vec_offset
, scale
, zero
, final_mask
);
9609 call
= gimple_build_call_internal
9610 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9611 vec_offset
, scale
, zero
);
9612 gimple_call_set_nothrow (call
, true);
9614 data_ref
= NULL_TREE
;
9619 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9620 if (alignment_support_scheme
== dr_aligned
)
9622 gcc_assert (aligned_access_p (first_dr_info
));
9625 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9627 align
= dr_alignment
9628 (vect_dr_behavior (vinfo
, first_dr_info
));
9632 misalign
= DR_MISALIGNMENT (first_dr_info
);
9633 if (dataref_offset
== NULL_TREE
9634 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9635 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9640 align
= least_bit_hwi (misalign
| align
);
9641 tree ptr
= build_int_cst (ref_type
, align
);
9643 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9646 gimple_call_set_nothrow (call
, true);
9648 data_ref
= NULL_TREE
;
9652 tree ltype
= vectype
;
9653 tree new_vtype
= NULL_TREE
;
9654 unsigned HOST_WIDE_INT gap
9655 = DR_GROUP_GAP (first_stmt_info
);
9656 unsigned int vect_align
9657 = vect_known_alignment_in_bytes (first_dr_info
);
9658 unsigned int scalar_dr_size
9659 = vect_get_scalar_dr_size (first_dr_info
);
9660 /* If there's no peeling for gaps but we have a gap
9661 with slp loads then load the lower half of the
9662 vector only. See get_group_load_store_type for
9663 when we apply this optimization. */
9666 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9668 && known_eq (nunits
, (group_size
- gap
) * 2)
9669 && known_eq (nunits
, group_size
)
9670 && gap
>= (vect_align
/ scalar_dr_size
))
9674 = vector_vector_composition_type (vectype
, 2,
9676 if (new_vtype
!= NULL_TREE
)
9680 = (dataref_offset
? dataref_offset
9681 : build_int_cst (ref_type
, 0));
9682 if (ltype
!= vectype
9683 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9685 unsigned HOST_WIDE_INT gap_offset
9686 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9687 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9688 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9691 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9692 if (alignment_support_scheme
== dr_aligned
)
9694 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9695 TREE_TYPE (data_ref
)
9696 = build_aligned_type (TREE_TYPE (data_ref
),
9697 align
* BITS_PER_UNIT
);
9699 TREE_TYPE (data_ref
)
9700 = build_aligned_type (TREE_TYPE (data_ref
),
9701 TYPE_ALIGN (elem_type
));
9702 if (ltype
!= vectype
)
9704 vect_copy_ref_info (data_ref
,
9705 DR_REF (first_dr_info
->dr
));
9706 tree tem
= make_ssa_name (ltype
);
9707 new_stmt
= gimple_build_assign (tem
, data_ref
);
9708 vect_finish_stmt_generation (vinfo
, stmt_info
,
9711 vec
<constructor_elt
, va_gc
> *v
;
9713 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9715 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9716 build_zero_cst (ltype
));
9717 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9721 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9722 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9723 build_zero_cst (ltype
));
9725 gcc_assert (new_vtype
!= NULL_TREE
);
9726 if (new_vtype
== vectype
)
9727 new_stmt
= gimple_build_assign (
9728 vec_dest
, build_constructor (vectype
, v
));
9731 tree new_vname
= make_ssa_name (new_vtype
);
9732 new_stmt
= gimple_build_assign (
9733 new_vname
, build_constructor (new_vtype
, v
));
9734 vect_finish_stmt_generation (vinfo
, stmt_info
,
9736 new_stmt
= gimple_build_assign (
9737 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9744 case dr_explicit_realign
:
9748 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9750 if (compute_in_loop
)
9751 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9753 dr_explicit_realign
,
9756 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9757 ptr
= copy_ssa_name (dataref_ptr
);
9759 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9760 // For explicit realign the target alignment should be
9761 // known at compile time.
9762 unsigned HOST_WIDE_INT align
=
9763 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9764 new_stmt
= gimple_build_assign
9765 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9767 (TREE_TYPE (dataref_ptr
),
9768 -(HOST_WIDE_INT
) align
));
9769 vect_finish_stmt_generation (vinfo
, stmt_info
,
9772 = build2 (MEM_REF
, vectype
, ptr
,
9773 build_int_cst (ref_type
, 0));
9774 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9775 vec_dest
= vect_create_destination_var (scalar_dest
,
9777 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9778 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9779 gimple_assign_set_lhs (new_stmt
, new_temp
);
9780 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9781 vect_finish_stmt_generation (vinfo
, stmt_info
,
9785 bump
= size_binop (MULT_EXPR
, vs
,
9786 TYPE_SIZE_UNIT (elem_type
));
9787 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9788 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9790 new_stmt
= gimple_build_assign
9791 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9793 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9794 ptr
= copy_ssa_name (ptr
, new_stmt
);
9795 gimple_assign_set_lhs (new_stmt
, ptr
);
9796 vect_finish_stmt_generation (vinfo
, stmt_info
,
9799 = build2 (MEM_REF
, vectype
, ptr
,
9800 build_int_cst (ref_type
, 0));
9803 case dr_explicit_realign_optimized
:
9805 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9806 new_temp
= copy_ssa_name (dataref_ptr
);
9808 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9809 // We should only be doing this if we know the target
9810 // alignment at compile time.
9811 unsigned HOST_WIDE_INT align
=
9812 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9813 new_stmt
= gimple_build_assign
9814 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9815 build_int_cst (TREE_TYPE (dataref_ptr
),
9816 -(HOST_WIDE_INT
) align
));
9817 vect_finish_stmt_generation (vinfo
, stmt_info
,
9820 = build2 (MEM_REF
, vectype
, new_temp
,
9821 build_int_cst (ref_type
, 0));
9827 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9828 /* DATA_REF is null if we've already built the statement. */
9831 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9832 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9834 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9835 gimple_set_lhs (new_stmt
, new_temp
);
9837 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9840 /* 3. Handle explicit realignment if necessary/supported.
9842 vec_dest = realign_load (msq, lsq, realignment_token) */
9843 if (alignment_support_scheme
== dr_explicit_realign_optimized
9844 || alignment_support_scheme
== dr_explicit_realign
)
9846 lsq
= gimple_assign_lhs (new_stmt
);
9847 if (!realignment_token
)
9848 realignment_token
= dataref_ptr
;
9849 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9850 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9851 msq
, lsq
, realignment_token
);
9852 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9853 gimple_assign_set_lhs (new_stmt
, new_temp
);
9855 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9858 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9861 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9862 add_phi_arg (phi
, lsq
,
9863 loop_latch_edge (containing_loop
),
9869 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9871 tree perm_mask
= perm_mask_for_reverse (vectype
);
9872 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9873 perm_mask
, stmt_info
, gsi
);
9874 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9877 /* Collect vector loads and later create their permutation in
9878 vect_transform_grouped_load (). */
9879 if (grouped_load
|| slp_perm
)
9880 dr_chain
.quick_push (new_temp
);
9882 /* Store vector loads in the corresponding SLP_NODE. */
9883 if (slp
&& !slp_perm
)
9884 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9886 /* With SLP permutation we load the gaps as well, without
9887 we need to skip the gaps after we manage to fully load
9888 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9889 group_elt
+= nunits
;
9890 if (maybe_ne (group_gap_adj
, 0U)
9892 && known_eq (group_elt
, group_size
- group_gap_adj
))
9894 poly_wide_int bump_val
9895 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9897 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9898 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9899 gsi
, stmt_info
, bump
);
9903 /* Bump the vector pointer to account for a gap or for excess
9904 elements loaded for a permuted SLP load. */
9905 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9907 poly_wide_int bump_val
9908 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9910 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9911 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9916 if (slp
&& !slp_perm
)
9922 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
9923 gsi
, vf
, false, &n_perms
);
9930 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9931 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9933 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9938 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9940 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9941 prev_stmt_info
= new_stmt_info
;
9944 dr_chain
.release ();
9950 /* Function vect_is_simple_cond.
9953 LOOP - the loop that is being vectorized.
9954 COND - Condition that is checked for simple use.
9957 *COMP_VECTYPE - the vector type for the comparison.
9958 *DTS - The def types for the arguments of the comparison
9960 Returns whether a COND can be vectorized. Checks whether
9961 condition operands are supportable using vec_is_simple_use. */
9964 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, slp_tree slp_node
,
9965 tree
*comp_vectype
, enum vect_def_type
*dts
,
9969 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9972 if (TREE_CODE (cond
) == SSA_NAME
9973 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9975 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
9977 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9982 if (!COMPARISON_CLASS_P (cond
))
9985 lhs
= TREE_OPERAND (cond
, 0);
9986 rhs
= TREE_OPERAND (cond
, 1);
9988 if (TREE_CODE (lhs
) == SSA_NAME
)
9990 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
9993 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9994 || TREE_CODE (lhs
) == FIXED_CST
)
9995 dts
[0] = vect_constant_def
;
9999 if (TREE_CODE (rhs
) == SSA_NAME
)
10001 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
10004 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10005 || TREE_CODE (rhs
) == FIXED_CST
)
10006 dts
[1] = vect_constant_def
;
10010 if (vectype1
&& vectype2
10011 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10012 TYPE_VECTOR_SUBPARTS (vectype2
)))
10015 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10016 /* Invariant comparison. */
10017 if (! *comp_vectype
)
10019 tree scalar_type
= TREE_TYPE (lhs
);
10020 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10021 *comp_vectype
= truth_type_for (vectype
);
10024 /* If we can widen the comparison to match vectype do so. */
10025 if (INTEGRAL_TYPE_P (scalar_type
)
10027 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10028 TYPE_SIZE (TREE_TYPE (vectype
))))
10029 scalar_type
= build_nonstandard_integer_type
10030 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10031 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10039 /* vectorizable_condition.
10041 Check if STMT_INFO is conditional modify expression that can be vectorized.
10042 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10043 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10046 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10048 Return true if STMT_INFO is vectorizable in this way. */
10051 vectorizable_condition (vec_info
*vinfo
,
10052 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10053 stmt_vec_info
*vec_stmt
,
10054 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10056 tree scalar_dest
= NULL_TREE
;
10057 tree vec_dest
= NULL_TREE
;
10058 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10059 tree then_clause
, else_clause
;
10060 tree comp_vectype
= NULL_TREE
;
10061 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10062 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10065 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10066 enum vect_def_type dts
[4]
10067 = {vect_unknown_def_type
, vect_unknown_def_type
,
10068 vect_unknown_def_type
, vect_unknown_def_type
};
10072 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10073 stmt_vec_info prev_stmt_info
= NULL
;
10075 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10076 vec
<tree
> vec_oprnds0
= vNULL
;
10077 vec
<tree
> vec_oprnds1
= vNULL
;
10078 vec
<tree
> vec_oprnds2
= vNULL
;
10079 vec
<tree
> vec_oprnds3
= vNULL
;
10081 bool masked
= false;
10083 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10086 /* Is vectorizable conditional operation? */
10087 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10091 code
= gimple_assign_rhs_code (stmt
);
10092 if (code
!= COND_EXPR
)
10095 stmt_vec_info reduc_info
= NULL
;
10096 int reduc_index
= -1;
10097 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10099 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10102 if (STMT_SLP_TYPE (stmt_info
))
10104 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10105 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10106 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10107 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10108 || reduc_index
!= -1);
10112 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10115 /* FORNOW: only supported as part of a reduction. */
10116 if (STMT_VINFO_LIVE_P (stmt_info
))
10118 if (dump_enabled_p ())
10119 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10120 "value used after loop.\n");
10125 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10126 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10131 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10135 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10139 gcc_assert (ncopies
>= 1);
10140 if (for_reduction
&& ncopies
> 1)
10141 return false; /* FORNOW */
10143 cond_expr
= gimple_assign_rhs1 (stmt
);
10145 if (!vect_is_simple_cond (cond_expr
, vinfo
, slp_node
,
10146 &comp_vectype
, &dts
[0], vectype
)
10150 unsigned slp_adjust
= 0;
10151 if (slp_node
&& SLP_TREE_CHILDREN (slp_node
).length () == 4)
10152 /* ??? Hack. Hope for COND_EXPR GIMPLE sanitizing or refactor
10155 slp_tree then_slp_node
, else_slp_node
;
10156 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + slp_adjust
,
10157 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10159 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + slp_adjust
,
10160 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10163 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10166 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10169 masked
= !COMPARISON_CLASS_P (cond_expr
);
10170 vec_cmp_type
= truth_type_for (comp_vectype
);
10172 if (vec_cmp_type
== NULL_TREE
)
10175 cond_code
= TREE_CODE (cond_expr
);
10178 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10179 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10182 /* For conditional reductions, the "then" value needs to be the candidate
10183 value calculated by this iteration while the "else" value needs to be
10184 the result carried over from previous iterations. If the COND_EXPR
10185 is the other way around, we need to swap it. */
10186 bool must_invert_cmp_result
= false;
10187 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10190 must_invert_cmp_result
= true;
10193 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10194 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10195 if (new_code
== ERROR_MARK
)
10196 must_invert_cmp_result
= true;
10199 cond_code
= new_code
;
10200 /* Make sure we don't accidentally use the old condition. */
10201 cond_expr
= NULL_TREE
;
10204 std::swap (then_clause
, else_clause
);
10207 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10209 /* Boolean values may have another representation in vectors
10210 and therefore we prefer bit operations over comparison for
10211 them (which also works for scalar masks). We store opcodes
10212 to use in bitop1 and bitop2. Statement is vectorized as
10213 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10214 depending on bitop1 and bitop2 arity. */
10218 bitop1
= BIT_NOT_EXPR
;
10219 bitop2
= BIT_AND_EXPR
;
10222 bitop1
= BIT_NOT_EXPR
;
10223 bitop2
= BIT_IOR_EXPR
;
10226 bitop1
= BIT_NOT_EXPR
;
10227 bitop2
= BIT_AND_EXPR
;
10228 std::swap (cond_expr0
, cond_expr1
);
10231 bitop1
= BIT_NOT_EXPR
;
10232 bitop2
= BIT_IOR_EXPR
;
10233 std::swap (cond_expr0
, cond_expr1
);
10236 bitop1
= BIT_XOR_EXPR
;
10239 bitop1
= BIT_XOR_EXPR
;
10240 bitop2
= BIT_NOT_EXPR
;
10245 cond_code
= SSA_NAME
;
10248 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10249 && reduction_type
== EXTRACT_LAST_REDUCTION
10250 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10252 if (dump_enabled_p ())
10253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10254 "reduction comparison operation not supported.\n");
10260 if (bitop1
!= NOP_EXPR
)
10262 machine_mode mode
= TYPE_MODE (comp_vectype
);
10265 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10266 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10269 if (bitop2
!= NOP_EXPR
)
10271 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10273 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10278 vect_cost_for_stmt kind
= vector_stmt
;
10279 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10280 /* Count one reduction-like operation per vector. */
10281 kind
= vec_to_scalar
;
10282 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10286 && (!vect_maybe_update_slp_op_vectype
10287 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10288 || (slp_adjust
== 1
10289 && !vect_maybe_update_slp_op_vectype
10290 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10291 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10292 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10294 if (dump_enabled_p ())
10295 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10296 "incompatible vector types for invariants\n");
10301 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
10302 && reduction_type
== EXTRACT_LAST_REDUCTION
)
10303 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10304 ncopies
* vec_num
, vectype
, NULL
);
10306 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10307 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10316 vec_oprnds0
.create (1);
10317 vec_oprnds1
.create (1);
10318 vec_oprnds2
.create (1);
10319 vec_oprnds3
.create (1);
10323 scalar_dest
= gimple_assign_lhs (stmt
);
10324 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10325 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10327 /* Handle cond expr. */
10328 for (j
= 0; j
< ncopies
; j
++)
10330 bool swap_cond_operands
= false;
10332 /* See whether another part of the vectorized code applies a loop
10333 mask to the condition, or to its inverse. */
10335 vec_loop_masks
*masks
= NULL
;
10336 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10338 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10339 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10342 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10343 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10344 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10347 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10348 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10349 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10351 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10352 cond_code
= cond
.code
;
10353 swap_cond_operands
= true;
10359 stmt_vec_info new_stmt_info
= NULL
;
10364 auto_vec
<vec
<tree
>, 4> vec_defs
;
10365 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
10366 vec_oprnds3
= vec_defs
.pop ();
10367 vec_oprnds2
= vec_defs
.pop ();
10369 vec_oprnds1
= vec_defs
.pop ();
10370 vec_oprnds0
= vec_defs
.pop ();
10377 = vect_get_vec_def_for_operand (vinfo
, cond_expr
, stmt_info
,
10383 = vect_get_vec_def_for_operand (vinfo
, cond_expr0
,
10384 stmt_info
, comp_vectype
);
10386 = vect_get_vec_def_for_operand (vinfo
, cond_expr1
,
10387 stmt_info
, comp_vectype
);
10389 vec_then_clause
= vect_get_vec_def_for_operand (vinfo
,
10392 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10393 vec_else_clause
= vect_get_vec_def_for_operand (vinfo
,
10401 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
10404 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
10406 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10407 vec_oprnds2
.pop ());
10408 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10409 vec_oprnds3
.pop ());
10414 vec_oprnds0
.quick_push (vec_cond_lhs
);
10416 vec_oprnds1
.quick_push (vec_cond_rhs
);
10417 vec_oprnds2
.quick_push (vec_then_clause
);
10418 vec_oprnds3
.quick_push (vec_else_clause
);
10421 /* Arguments are ready. Create the new vector stmt. */
10422 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10424 vec_then_clause
= vec_oprnds2
[i
];
10425 vec_else_clause
= vec_oprnds3
[i
];
10427 if (swap_cond_operands
)
10428 std::swap (vec_then_clause
, vec_else_clause
);
10431 vec_compare
= vec_cond_lhs
;
10434 vec_cond_rhs
= vec_oprnds1
[i
];
10435 if (bitop1
== NOP_EXPR
)
10436 vec_compare
= build2 (cond_code
, vec_cmp_type
,
10437 vec_cond_lhs
, vec_cond_rhs
);
10440 new_temp
= make_ssa_name (vec_cmp_type
);
10442 if (bitop1
== BIT_NOT_EXPR
)
10443 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10447 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10449 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10450 if (bitop2
== NOP_EXPR
)
10451 vec_compare
= new_temp
;
10452 else if (bitop2
== BIT_NOT_EXPR
)
10454 /* Instead of doing ~x ? y : z do x ? z : y. */
10455 vec_compare
= new_temp
;
10456 std::swap (vec_then_clause
, vec_else_clause
);
10460 vec_compare
= make_ssa_name (vec_cmp_type
);
10462 = gimple_build_assign (vec_compare
, bitop2
,
10463 vec_cond_lhs
, new_temp
);
10464 vect_finish_stmt_generation (vinfo
, stmt_info
,
10470 /* If we decided to apply a loop mask to the result of the vector
10471 comparison, AND the comparison with the mask now. Later passes
10472 should then be able to reuse the AND results between mulitple
10476 for (int i = 0; i < 100; ++i)
10477 x[i] = y[i] ? z[i] : 10;
10479 results in following optimized GIMPLE:
10481 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10482 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10483 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10484 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10485 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10486 vect_iftmp.11_47, { 10, ... }>;
10488 instead of using a masked and unmasked forms of
10489 vec != { 0, ... } (masked in the MASK_LOAD,
10490 unmasked in the VEC_COND_EXPR). */
10492 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10493 in cases where that's necessary. */
10495 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10497 if (!is_gimple_val (vec_compare
))
10499 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10500 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10502 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10503 vec_compare
= vec_compare_name
;
10506 if (must_invert_cmp_result
)
10508 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10509 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10512 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10513 vec_compare
= vec_compare_name
;
10518 unsigned vec_num
= vec_oprnds0
.length ();
10520 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10521 vectype
, vec_num
* j
+ i
);
10522 tree tmp2
= make_ssa_name (vec_cmp_type
);
10524 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10526 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10527 vec_compare
= tmp2
;
10531 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10533 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10534 tree lhs
= gimple_get_lhs (old_stmt
);
10535 gcall
*new_stmt
= gimple_build_call_internal
10536 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10538 gimple_call_set_lhs (new_stmt
, lhs
);
10539 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10540 if (old_stmt
== gsi_stmt (*gsi
))
10541 new_stmt_info
= vect_finish_replace_stmt (vinfo
,
10542 stmt_info
, new_stmt
);
10545 /* In this case we're moving the definition to later in the
10546 block. That doesn't matter because the only uses of the
10547 lhs are in phi statements. */
10548 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10549 gsi_remove (&old_gsi
, true);
10551 = vect_finish_stmt_generation (vinfo
, stmt_info
,
10557 new_temp
= make_ssa_name (vec_dest
);
10559 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10560 vec_then_clause
, vec_else_clause
);
10562 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10565 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10572 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10574 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10576 prev_stmt_info
= new_stmt_info
;
10579 vec_oprnds0
.release ();
10580 vec_oprnds1
.release ();
10581 vec_oprnds2
.release ();
10582 vec_oprnds3
.release ();
10587 /* vectorizable_comparison.
10589 Check if STMT_INFO is comparison expression that can be vectorized.
10590 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10591 comparison, put it in VEC_STMT, and insert it at GSI.
10593 Return true if STMT_INFO is vectorizable in this way. */
10596 vectorizable_comparison (vec_info
*vinfo
,
10597 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10598 stmt_vec_info
*vec_stmt
,
10599 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10601 tree lhs
, rhs1
, rhs2
;
10602 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10603 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10604 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10606 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10607 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10609 poly_uint64 nunits
;
10611 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10612 stmt_vec_info prev_stmt_info
= NULL
;
10614 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10615 vec
<tree
> vec_oprnds0
= vNULL
;
10616 vec
<tree
> vec_oprnds1
= vNULL
;
10620 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10623 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10626 mask_type
= vectype
;
10627 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10632 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10634 gcc_assert (ncopies
>= 1);
10635 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10638 if (STMT_VINFO_LIVE_P (stmt_info
))
10640 if (dump_enabled_p ())
10641 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10642 "value used after loop.\n");
10646 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10650 code
= gimple_assign_rhs_code (stmt
);
10652 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10655 slp_tree slp_rhs1
, slp_rhs2
;
10656 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10657 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10660 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10661 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10664 if (vectype1
&& vectype2
10665 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10666 TYPE_VECTOR_SUBPARTS (vectype2
)))
10669 vectype
= vectype1
? vectype1
: vectype2
;
10671 /* Invariant comparison. */
10674 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10675 vectype
= mask_type
;
10677 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10679 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10682 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10685 /* Can't compare mask and non-mask types. */
10686 if (vectype1
&& vectype2
10687 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10690 /* Boolean values may have another representation in vectors
10691 and therefore we prefer bit operations over comparison for
10692 them (which also works for scalar masks). We store opcodes
10693 to use in bitop1 and bitop2. Statement is vectorized as
10694 BITOP2 (rhs1 BITOP1 rhs2) or
10695 rhs1 BITOP2 (BITOP1 rhs2)
10696 depending on bitop1 and bitop2 arity. */
10697 bool swap_p
= false;
10698 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10700 if (code
== GT_EXPR
)
10702 bitop1
= BIT_NOT_EXPR
;
10703 bitop2
= BIT_AND_EXPR
;
10705 else if (code
== GE_EXPR
)
10707 bitop1
= BIT_NOT_EXPR
;
10708 bitop2
= BIT_IOR_EXPR
;
10710 else if (code
== LT_EXPR
)
10712 bitop1
= BIT_NOT_EXPR
;
10713 bitop2
= BIT_AND_EXPR
;
10716 else if (code
== LE_EXPR
)
10718 bitop1
= BIT_NOT_EXPR
;
10719 bitop2
= BIT_IOR_EXPR
;
10724 bitop1
= BIT_XOR_EXPR
;
10725 if (code
== EQ_EXPR
)
10726 bitop2
= BIT_NOT_EXPR
;
10732 if (bitop1
== NOP_EXPR
)
10734 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10739 machine_mode mode
= TYPE_MODE (vectype
);
10742 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10743 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10746 if (bitop2
!= NOP_EXPR
)
10748 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10749 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10754 /* Put types on constant and invariant SLP children. */
10756 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10757 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10759 if (dump_enabled_p ())
10760 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10761 "incompatible vector types for invariants\n");
10765 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10766 vect_model_simple_cost (vinfo
, stmt_info
,
10767 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10768 dts
, ndts
, slp_node
, cost_vec
);
10775 vec_oprnds0
.create (1);
10776 vec_oprnds1
.create (1);
10780 lhs
= gimple_assign_lhs (stmt
);
10781 mask
= vect_create_destination_var (lhs
, mask_type
);
10783 /* Handle cmp expr. */
10784 for (j
= 0; j
< ncopies
; j
++)
10786 stmt_vec_info new_stmt_info
= NULL
;
10791 auto_vec
<vec
<tree
>, 2> vec_defs
;
10792 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
10793 vec_oprnds1
= vec_defs
.pop ();
10794 vec_oprnds0
= vec_defs
.pop ();
10796 std::swap (vec_oprnds0
, vec_oprnds1
);
10800 vec_rhs1
= vect_get_vec_def_for_operand (vinfo
, rhs1
, stmt_info
,
10802 vec_rhs2
= vect_get_vec_def_for_operand (vinfo
, rhs2
, stmt_info
,
10808 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
10809 vec_oprnds0
.pop ());
10810 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
10811 vec_oprnds1
.pop ());
10816 if (swap_p
&& j
== 0)
10817 std::swap (vec_rhs1
, vec_rhs2
);
10818 vec_oprnds0
.quick_push (vec_rhs1
);
10819 vec_oprnds1
.quick_push (vec_rhs2
);
10822 /* Arguments are ready. Create the new vector stmt. */
10823 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10825 vec_rhs2
= vec_oprnds1
[i
];
10827 new_temp
= make_ssa_name (mask
);
10828 if (bitop1
== NOP_EXPR
)
10830 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
10831 vec_rhs1
, vec_rhs2
);
10833 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10838 if (bitop1
== BIT_NOT_EXPR
)
10839 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10841 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10844 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10845 if (bitop2
!= NOP_EXPR
)
10847 tree res
= make_ssa_name (mask
);
10848 if (bitop2
== BIT_NOT_EXPR
)
10849 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10851 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10854 = vect_finish_stmt_generation (vinfo
, stmt_info
,
10859 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10866 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10868 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10870 prev_stmt_info
= new_stmt_info
;
10873 vec_oprnds0
.release ();
10874 vec_oprnds1
.release ();
10879 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10880 can handle all live statements in the node. Otherwise return true
10881 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10882 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10885 can_vectorize_live_stmts (loop_vec_info loop_vinfo
,
10886 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10887 slp_tree slp_node
, slp_instance slp_node_instance
,
10889 stmt_vector_for_cost
*cost_vec
)
10893 stmt_vec_info slp_stmt_info
;
10895 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10897 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10898 && !vectorizable_live_operation (loop_vinfo
,
10899 slp_stmt_info
, gsi
, slp_node
,
10900 slp_node_instance
, i
,
10901 vec_stmt_p
, cost_vec
))
10905 else if (STMT_VINFO_LIVE_P (stmt_info
)
10906 && !vectorizable_live_operation (loop_vinfo
, stmt_info
, gsi
,
10907 slp_node
, slp_node_instance
, -1,
10908 vec_stmt_p
, cost_vec
))
10914 /* Make sure the statement is vectorizable. */
10917 vect_analyze_stmt (vec_info
*vinfo
,
10918 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10919 slp_tree node
, slp_instance node_instance
,
10920 stmt_vector_for_cost
*cost_vec
)
10922 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10923 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10925 gimple_seq pattern_def_seq
;
10927 if (dump_enabled_p ())
10928 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10931 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10932 return opt_result::failure_at (stmt_info
->stmt
,
10934 " stmt has volatile operands: %G\n",
10937 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10939 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10941 gimple_stmt_iterator si
;
10943 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10945 stmt_vec_info pattern_def_stmt_info
10946 = vinfo
->lookup_stmt (gsi_stmt (si
));
10947 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10948 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10950 /* Analyze def stmt of STMT if it's a pattern stmt. */
10951 if (dump_enabled_p ())
10952 dump_printf_loc (MSG_NOTE
, vect_location
,
10953 "==> examining pattern def statement: %G",
10954 pattern_def_stmt_info
->stmt
);
10957 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10958 need_to_vectorize
, node
, node_instance
,
10966 /* Skip stmts that do not need to be vectorized. In loops this is expected
10968 - the COND_EXPR which is the loop exit condition
10969 - any LABEL_EXPRs in the loop
10970 - computations that are used only for array indexing or loop control.
10971 In basic blocks we only analyze statements that are a part of some SLP
10972 instance, therefore, all the statements are relevant.
10974 Pattern statement needs to be analyzed instead of the original statement
10975 if the original statement is not relevant. Otherwise, we analyze both
10976 statements. In basic blocks we are called from some SLP instance
10977 traversal, don't analyze pattern stmts instead, the pattern stmts
10978 already will be part of SLP instance. */
10980 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10981 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10982 && !STMT_VINFO_LIVE_P (stmt_info
))
10984 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10985 && pattern_stmt_info
10986 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10987 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10989 /* Analyze PATTERN_STMT instead of the original stmt. */
10990 stmt_info
= pattern_stmt_info
;
10991 if (dump_enabled_p ())
10992 dump_printf_loc (MSG_NOTE
, vect_location
,
10993 "==> examining pattern statement: %G",
10998 if (dump_enabled_p ())
10999 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
11001 return opt_result::success ();
11004 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11006 && pattern_stmt_info
11007 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11008 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11010 /* Analyze PATTERN_STMT too. */
11011 if (dump_enabled_p ())
11012 dump_printf_loc (MSG_NOTE
, vect_location
,
11013 "==> examining pattern statement: %G",
11014 pattern_stmt_info
->stmt
);
11017 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
11018 node_instance
, cost_vec
);
11023 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
11025 case vect_internal_def
:
11028 case vect_reduction_def
:
11029 case vect_nested_cycle
:
11030 gcc_assert (!bb_vinfo
11031 && (relevance
== vect_used_in_outer
11032 || relevance
== vect_used_in_outer_by_reduction
11033 || relevance
== vect_used_by_reduction
11034 || relevance
== vect_unused_in_scope
11035 || relevance
== vect_used_only_live
));
11038 case vect_induction_def
:
11039 gcc_assert (!bb_vinfo
);
11042 case vect_constant_def
:
11043 case vect_external_def
:
11044 case vect_unknown_def_type
:
11046 gcc_unreachable ();
11049 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11051 tree type
= gimple_expr_type (stmt_info
->stmt
);
11052 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
11053 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11054 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11055 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11056 *need_to_vectorize
= true;
11059 if (PURE_SLP_STMT (stmt_info
) && !node
)
11061 if (dump_enabled_p ())
11062 dump_printf_loc (MSG_NOTE
, vect_location
,
11063 "handled only by SLP analysis\n");
11064 return opt_result::success ();
11069 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11070 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11071 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11072 -mveclibabi= takes preference over library functions with
11073 the simd attribute. */
11074 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11075 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11077 || vectorizable_conversion (vinfo
, stmt_info
,
11078 NULL
, NULL
, node
, cost_vec
)
11079 || vectorizable_operation (vinfo
, stmt_info
,
11080 NULL
, NULL
, node
, cost_vec
)
11081 || vectorizable_assignment (vinfo
, stmt_info
,
11082 NULL
, NULL
, node
, cost_vec
)
11083 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11084 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11085 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11086 node
, node_instance
, cost_vec
)
11087 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11088 NULL
, NULL
, node
, cost_vec
)
11089 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11090 || vectorizable_condition (vinfo
, stmt_info
,
11091 NULL
, NULL
, node
, cost_vec
)
11092 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11094 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11095 stmt_info
, NULL
, node
));
11099 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11100 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11101 NULL
, NULL
, node
, cost_vec
)
11102 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11104 || vectorizable_shift (vinfo
, stmt_info
,
11105 NULL
, NULL
, node
, cost_vec
)
11106 || vectorizable_operation (vinfo
, stmt_info
,
11107 NULL
, NULL
, node
, cost_vec
)
11108 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11110 || vectorizable_load (vinfo
, stmt_info
,
11111 NULL
, NULL
, node
, cost_vec
)
11112 || vectorizable_store (vinfo
, stmt_info
,
11113 NULL
, NULL
, node
, cost_vec
)
11114 || vectorizable_condition (vinfo
, stmt_info
,
11115 NULL
, NULL
, node
, cost_vec
)
11116 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11121 return opt_result::failure_at (stmt_info
->stmt
,
11123 " relevant stmt not supported: %G",
11126 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11127 need extra handling, except for vectorizable reductions. */
11129 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11130 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11131 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11132 stmt_info
, NULL
, node
, node_instance
,
11134 return opt_result::failure_at (stmt_info
->stmt
,
11136 " live stmt not supported: %G",
11139 return opt_result::success ();
11143 /* Function vect_transform_stmt.
11145 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11148 vect_transform_stmt (vec_info
*vinfo
,
11149 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11150 slp_tree slp_node
, slp_instance slp_node_instance
)
11152 bool is_store
= false;
11153 stmt_vec_info vec_stmt
= NULL
;
11156 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11157 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
11159 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
11160 bool nested_p
= (loop_vinfo
11161 && nested_in_vect_loop_p
11162 (LOOP_VINFO_LOOP (loop_vinfo
), stmt_info
));
11164 gimple
*stmt
= stmt_info
->stmt
;
11165 switch (STMT_VINFO_TYPE (stmt_info
))
11167 case type_demotion_vec_info_type
:
11168 case type_promotion_vec_info_type
:
11169 case type_conversion_vec_info_type
:
11170 done
= vectorizable_conversion (vinfo
, stmt_info
,
11171 gsi
, &vec_stmt
, slp_node
, NULL
);
11175 case induc_vec_info_type
:
11176 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11177 stmt_info
, gsi
, &vec_stmt
, slp_node
,
11182 case shift_vec_info_type
:
11183 done
= vectorizable_shift (vinfo
, stmt_info
,
11184 gsi
, &vec_stmt
, slp_node
, NULL
);
11188 case op_vec_info_type
:
11189 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11194 case assignment_vec_info_type
:
11195 done
= vectorizable_assignment (vinfo
, stmt_info
,
11196 gsi
, &vec_stmt
, slp_node
, NULL
);
11200 case load_vec_info_type
:
11201 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11206 case store_vec_info_type
:
11207 done
= vectorizable_store (vinfo
, stmt_info
,
11208 gsi
, &vec_stmt
, slp_node
, NULL
);
11210 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11212 /* In case of interleaving, the whole chain is vectorized when the
11213 last store in the chain is reached. Store stmts before the last
11214 one are skipped, and there vec_stmt_info shouldn't be freed
11216 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11217 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11224 case condition_vec_info_type
:
11225 done
= vectorizable_condition (vinfo
, stmt_info
,
11226 gsi
, &vec_stmt
, slp_node
, NULL
);
11230 case comparison_vec_info_type
:
11231 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11236 case call_vec_info_type
:
11237 done
= vectorizable_call (vinfo
, stmt_info
,
11238 gsi
, &vec_stmt
, slp_node
, NULL
);
11239 stmt
= gsi_stmt (*gsi
);
11242 case call_simd_clone_vec_info_type
:
11243 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11245 stmt
= gsi_stmt (*gsi
);
11248 case reduc_vec_info_type
:
11249 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11250 gsi
, &vec_stmt
, slp_node
);
11254 case cycle_phi_info_type
:
11255 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11256 &vec_stmt
, slp_node
, slp_node_instance
);
11260 case lc_phi_info_type
:
11261 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11262 stmt_info
, &vec_stmt
, slp_node
);
11267 if (!STMT_VINFO_LIVE_P (stmt_info
))
11269 if (dump_enabled_p ())
11270 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11271 "stmt not supported.\n");
11272 gcc_unreachable ();
11277 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11278 This would break hybrid SLP vectorization. */
11280 gcc_assert (!vec_stmt
11281 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
11283 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11284 is being vectorized, but outside the immediately enclosing loop. */
11287 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11288 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
11289 || STMT_VINFO_RELEVANT (stmt_info
) ==
11290 vect_used_in_outer_by_reduction
))
11292 class loop
*innerloop
= LOOP_VINFO_LOOP (loop_vinfo
)->inner
;
11293 imm_use_iterator imm_iter
;
11294 use_operand_p use_p
;
11297 if (dump_enabled_p ())
11298 dump_printf_loc (MSG_NOTE
, vect_location
,
11299 "Record the vdef for outer-loop vectorization.\n");
11301 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11302 (to be used when vectorizing outer-loop stmts that use the DEF of
11304 if (gimple_code (stmt
) == GIMPLE_PHI
)
11305 scalar_dest
= PHI_RESULT (stmt
);
11307 scalar_dest
= gimple_get_lhs (stmt
);
11309 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
11310 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
11312 stmt_vec_info exit_phi_info
11313 = vinfo
->lookup_stmt (USE_STMT (use_p
));
11314 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
11319 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
11321 if (STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
11324 /* If this stmt defines a value used on a backedge, update the
11325 vectorized PHIs. */
11326 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
11327 stmt_vec_info reduc_info
;
11328 if (STMT_VINFO_REDUC_DEF (orig_stmt_info
)
11329 && vect_stmt_to_vectorize (orig_stmt_info
) == stmt_info
11330 && (reduc_info
= info_for_reduction (vinfo
, orig_stmt_info
))
11331 && STMT_VINFO_REDUC_TYPE (reduc_info
) != FOLD_LEFT_REDUCTION
11332 && STMT_VINFO_REDUC_TYPE (reduc_info
) != EXTRACT_LAST_REDUCTION
)
11337 && (phi
= dyn_cast
<gphi
*>
11338 (STMT_VINFO_REDUC_DEF (orig_stmt_info
)->stmt
))
11339 && dominated_by_p (CDI_DOMINATORS
,
11340 gimple_bb (orig_stmt_info
->stmt
), gimple_bb (phi
))
11341 && (e
= loop_latch_edge (gimple_bb (phi
)->loop_father
))
11342 && (PHI_ARG_DEF_FROM_EDGE (phi
, e
)
11343 == gimple_get_lhs (orig_stmt_info
->stmt
)))
11345 stmt_vec_info phi_info
11346 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info
));
11347 stmt_vec_info vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
11350 add_phi_arg (as_a
<gphi
*> (phi_info
->stmt
),
11351 gimple_get_lhs (vec_stmt
->stmt
), e
,
11352 gimple_phi_arg_location (phi
, e
->dest_idx
));
11353 phi_info
= STMT_VINFO_RELATED_STMT (phi_info
);
11354 vec_stmt
= STMT_VINFO_RELATED_STMT (vec_stmt
);
11357 gcc_assert (!vec_stmt
);
11360 && slp_node
!= slp_node_instance
->reduc_phis
)
11362 slp_tree phi_node
= slp_node_instance
->reduc_phis
;
11363 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_SCALAR_STMTS (phi_node
)[0]->stmt
);
11364 e
= loop_latch_edge (gimple_bb (phi
)->loop_father
);
11365 gcc_assert (SLP_TREE_VEC_STMTS (phi_node
).length ()
11366 == SLP_TREE_VEC_STMTS (slp_node
).length ());
11367 for (unsigned i
= 0; i
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++i
)
11368 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[i
]->stmt
),
11369 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]->stmt
),
11370 e
, gimple_phi_arg_location (phi
, e
->dest_idx
));
11374 /* Handle stmts whose DEF is used outside the loop-nest that is
11375 being vectorized. */
11376 if (is_a
<loop_vec_info
> (vinfo
))
11377 done
= can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11378 stmt_info
, gsi
, slp_node
,
11379 slp_node_instance
, true, NULL
);
11386 /* Remove a group of stores (for SLP or interleaving), free their
11390 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11392 stmt_vec_info next_stmt_info
= first_stmt_info
;
11394 while (next_stmt_info
)
11396 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11397 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11398 /* Free the attached stmt_vec_info and remove the stmt. */
11399 vinfo
->remove_stmt (next_stmt_info
);
11400 next_stmt_info
= tmp
;
11404 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11405 elements of type SCALAR_TYPE, or null if the target doesn't support
11408 If NUNITS is zero, return a vector type that contains elements of
11409 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11411 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11412 for this vectorization region and want to "autodetect" the best choice.
11413 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11414 and we want the new type to be interoperable with it. PREVAILING_MODE
11415 in this case can be a scalar integer mode or a vector mode; when it
11416 is a vector mode, the function acts like a tree-level version of
11417 related_vector_mode. */
11420 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11421 tree scalar_type
, poly_uint64 nunits
)
11423 tree orig_scalar_type
= scalar_type
;
11424 scalar_mode inner_mode
;
11425 machine_mode simd_mode
;
11428 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11429 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11432 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11434 /* For vector types of elements whose mode precision doesn't
11435 match their types precision we use a element type of mode
11436 precision. The vectorization routines will have to make sure
11437 they support the proper result truncation/extension.
11438 We also make sure to build vector types with INTEGER_TYPE
11439 component type only. */
11440 if (INTEGRAL_TYPE_P (scalar_type
)
11441 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11442 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11443 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11444 TYPE_UNSIGNED (scalar_type
));
11446 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11447 When the component mode passes the above test simply use a type
11448 corresponding to that mode. The theory is that any use that
11449 would cause problems with this will disable vectorization anyway. */
11450 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11451 && !INTEGRAL_TYPE_P (scalar_type
))
11452 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11454 /* We can't build a vector type of elements with alignment bigger than
11456 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11457 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11458 TYPE_UNSIGNED (scalar_type
));
11460 /* If we felt back to using the mode fail if there was
11461 no scalar type for it. */
11462 if (scalar_type
== NULL_TREE
)
11465 /* If no prevailing mode was supplied, use the mode the target prefers.
11466 Otherwise lookup a vector mode based on the prevailing mode. */
11467 if (prevailing_mode
== VOIDmode
)
11469 gcc_assert (known_eq (nunits
, 0U));
11470 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11471 if (SCALAR_INT_MODE_P (simd_mode
))
11473 /* Traditional behavior is not to take the integer mode
11474 literally, but simply to use it as a way of determining
11475 the vector size. It is up to mode_for_vector to decide
11476 what the TYPE_MODE should be.
11478 Note that nunits == 1 is allowed in order to support single
11479 element vector types. */
11480 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11481 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11485 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11486 || !related_vector_mode (prevailing_mode
,
11487 inner_mode
, nunits
).exists (&simd_mode
))
11489 /* Fall back to using mode_for_vector, mostly in the hope of being
11490 able to use an integer mode. */
11491 if (known_eq (nunits
, 0U)
11492 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11495 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11499 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11501 /* In cases where the mode was chosen by mode_for_vector, check that
11502 the target actually supports the chosen mode, or that it at least
11503 allows the vector mode to be replaced by a like-sized integer. */
11504 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11505 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11508 /* Re-attach the address-space qualifier if we canonicalized the scalar
11510 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11511 return build_qualified_type
11512 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11517 /* Function get_vectype_for_scalar_type.
11519 Returns the vector type corresponding to SCALAR_TYPE as supported
11520 by the target. If GROUP_SIZE is nonzero and we're performing BB
11521 vectorization, make sure that the number of elements in the vector
11522 is no bigger than GROUP_SIZE. */
11525 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11526 unsigned int group_size
)
11528 /* For BB vectorization, we should always have a group size once we've
11529 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11530 are tentative requests during things like early data reference
11531 analysis and pattern recognition. */
11532 if (is_a
<bb_vec_info
> (vinfo
))
11533 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11537 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11539 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11540 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11542 /* Register the natural choice of vector type, before the group size
11543 has been applied. */
11545 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11547 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11548 try again with an explicit number of elements. */
11551 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11553 /* Start with the biggest number of units that fits within
11554 GROUP_SIZE and halve it until we find a valid vector type.
11555 Usually either the first attempt will succeed or all will
11556 fail (in the latter case because GROUP_SIZE is too small
11557 for the target), but it's possible that a target could have
11558 a hole between supported vector types.
11560 If GROUP_SIZE is not a power of 2, this has the effect of
11561 trying the largest power of 2 that fits within the group,
11562 even though the group is not a multiple of that vector size.
11563 The BB vectorizer will then try to carve up the group into
11565 unsigned int nunits
= 1 << floor_log2 (group_size
);
11568 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11569 scalar_type
, nunits
);
11572 while (nunits
> 1 && !vectype
);
11578 /* Return the vector type corresponding to SCALAR_TYPE as supported
11579 by the target. NODE, if nonnull, is the SLP tree node that will
11580 use the returned vector type. */
11583 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11585 unsigned int group_size
= 0;
11588 group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
11589 if (group_size
== 0)
11590 group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
11592 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11595 /* Function get_mask_type_for_scalar_type.
11597 Returns the mask type corresponding to a result of comparison
11598 of vectors of specified SCALAR_TYPE as supported by target.
11599 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11600 make sure that the number of elements in the vector is no bigger
11601 than GROUP_SIZE. */
11604 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11605 unsigned int group_size
)
11607 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11612 return truth_type_for (vectype
);
11615 /* Function get_same_sized_vectype
11617 Returns a vector type corresponding to SCALAR_TYPE of size
11618 VECTOR_TYPE if supported by the target. */
11621 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11623 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11624 return truth_type_for (vector_type
);
11626 poly_uint64 nunits
;
11627 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11628 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11631 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11632 scalar_type
, nunits
);
11635 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11636 would not change the chosen vector modes. */
11639 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11641 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11642 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11643 if (!VECTOR_MODE_P (*i
)
11644 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11649 /* Function vect_is_simple_use.
11652 VINFO - the vect info of the loop or basic block that is being vectorized.
11653 OPERAND - operand in the loop or bb.
11655 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11656 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11657 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11658 the definition could be anywhere in the function
11659 DT - the type of definition
11661 Returns whether a stmt with OPERAND can be vectorized.
11662 For loops, supportable operands are constants, loop invariants, and operands
11663 that are defined by the current iteration of the loop. Unsupportable
11664 operands are those that are defined by a previous iteration of the loop (as
11665 is the case in reduction/induction computations).
11666 For basic blocks, supportable operands are constants and bb invariants.
11667 For now, operands defined outside the basic block are not supported. */
11670 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11671 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11673 if (def_stmt_info_out
)
11674 *def_stmt_info_out
= NULL
;
11676 *def_stmt_out
= NULL
;
11677 *dt
= vect_unknown_def_type
;
11679 if (dump_enabled_p ())
11681 dump_printf_loc (MSG_NOTE
, vect_location
,
11682 "vect_is_simple_use: operand ");
11683 if (TREE_CODE (operand
) == SSA_NAME
11684 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11685 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11687 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11690 if (CONSTANT_CLASS_P (operand
))
11691 *dt
= vect_constant_def
;
11692 else if (is_gimple_min_invariant (operand
))
11693 *dt
= vect_external_def
;
11694 else if (TREE_CODE (operand
) != SSA_NAME
)
11695 *dt
= vect_unknown_def_type
;
11696 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11697 *dt
= vect_external_def
;
11700 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11701 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11703 *dt
= vect_external_def
;
11706 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11707 def_stmt
= stmt_vinfo
->stmt
;
11708 switch (gimple_code (def_stmt
))
11711 case GIMPLE_ASSIGN
:
11713 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11716 *dt
= vect_unknown_def_type
;
11719 if (def_stmt_info_out
)
11720 *def_stmt_info_out
= stmt_vinfo
;
11723 *def_stmt_out
= def_stmt
;
11726 if (dump_enabled_p ())
11728 dump_printf (MSG_NOTE
, ", type of def: ");
11731 case vect_uninitialized_def
:
11732 dump_printf (MSG_NOTE
, "uninitialized\n");
11734 case vect_constant_def
:
11735 dump_printf (MSG_NOTE
, "constant\n");
11737 case vect_external_def
:
11738 dump_printf (MSG_NOTE
, "external\n");
11740 case vect_internal_def
:
11741 dump_printf (MSG_NOTE
, "internal\n");
11743 case vect_induction_def
:
11744 dump_printf (MSG_NOTE
, "induction\n");
11746 case vect_reduction_def
:
11747 dump_printf (MSG_NOTE
, "reduction\n");
11749 case vect_double_reduction_def
:
11750 dump_printf (MSG_NOTE
, "double reduction\n");
11752 case vect_nested_cycle
:
11753 dump_printf (MSG_NOTE
, "nested cycle\n");
11755 case vect_unknown_def_type
:
11756 dump_printf (MSG_NOTE
, "unknown\n");
11761 if (*dt
== vect_unknown_def_type
)
11763 if (dump_enabled_p ())
11764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11765 "Unsupported pattern.\n");
11772 /* Function vect_is_simple_use.
11774 Same as vect_is_simple_use but also determines the vector operand
11775 type of OPERAND and stores it to *VECTYPE. If the definition of
11776 OPERAND is vect_uninitialized_def, vect_constant_def or
11777 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11778 is responsible to compute the best suited vector type for the
11782 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11783 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11784 gimple
**def_stmt_out
)
11786 stmt_vec_info def_stmt_info
;
11788 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11792 *def_stmt_out
= def_stmt
;
11793 if (def_stmt_info_out
)
11794 *def_stmt_info_out
= def_stmt_info
;
11796 /* Now get a vector type if the def is internal, otherwise supply
11797 NULL_TREE and leave it up to the caller to figure out a proper
11798 type for the use stmt. */
11799 if (*dt
== vect_internal_def
11800 || *dt
== vect_induction_def
11801 || *dt
== vect_reduction_def
11802 || *dt
== vect_double_reduction_def
11803 || *dt
== vect_nested_cycle
)
11805 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11806 gcc_assert (*vectype
!= NULL_TREE
);
11807 if (dump_enabled_p ())
11808 dump_printf_loc (MSG_NOTE
, vect_location
,
11809 "vect_is_simple_use: vectype %T\n", *vectype
);
11811 else if (*dt
== vect_uninitialized_def
11812 || *dt
== vect_constant_def
11813 || *dt
== vect_external_def
)
11814 *vectype
= NULL_TREE
;
11816 gcc_unreachable ();
11821 /* Function vect_is_simple_use.
11823 Same as vect_is_simple_use but determines the operand by operand
11824 position OPERAND from either STMT or SLP_NODE, filling in *OP
11825 and *SLP_DEF (when SLP_NODE is not NULL). */
11828 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11829 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11830 enum vect_def_type
*dt
,
11831 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11835 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11837 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11838 *op
= gimple_get_lhs (SLP_TREE_SCALAR_STMTS (child
)[0]->stmt
);
11840 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11844 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11846 *op
= gimple_op (ass
, operand
+ 1);
11847 /* ??? Ick. But it will vanish with SLP only. */
11848 if (TREE_CODE (*op
) == VIEW_CONVERT_EXPR
)
11849 *op
= TREE_OPERAND (*op
, 0);
11851 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11852 *op
= gimple_call_arg (call
, operand
);
11854 gcc_unreachable ();
11857 /* ??? We might want to update *vectype from *slp_def here though
11858 when sharing nodes this would prevent unsharing in the caller. */
11859 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11862 /* If OP is not NULL and is external or constant update its vector
11863 type with VECTYPE. Returns true if successful or false if not,
11864 for example when conflicting vector types are present. */
11867 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11869 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11871 if (SLP_TREE_VECTYPE (op
))
11872 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11873 SLP_TREE_VECTYPE (op
) = vectype
;
11877 /* Function supportable_widening_operation
11879 Check whether an operation represented by the code CODE is a
11880 widening operation that is supported by the target platform in
11881 vector form (i.e., when operating on arguments of type VECTYPE_IN
11882 producing a result of type VECTYPE_OUT).
11884 Widening operations we currently support are NOP (CONVERT), FLOAT,
11885 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11886 are supported by the target platform either directly (via vector
11887 tree-codes), or via target builtins.
11890 - CODE1 and CODE2 are codes of vector operations to be used when
11891 vectorizing the operation, if available.
11892 - MULTI_STEP_CVT determines the number of required intermediate steps in
11893 case of multi-step conversion (like char->short->int - in that case
11894 MULTI_STEP_CVT will be 1).
11895 - INTERM_TYPES contains the intermediate type required to perform the
11896 widening operation (short in the above example). */
11899 supportable_widening_operation (vec_info
*vinfo
,
11900 enum tree_code code
, stmt_vec_info stmt_info
,
11901 tree vectype_out
, tree vectype_in
,
11902 enum tree_code
*code1
, enum tree_code
*code2
,
11903 int *multi_step_cvt
,
11904 vec
<tree
> *interm_types
)
11906 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11907 class loop
*vect_loop
= NULL
;
11908 machine_mode vec_mode
;
11909 enum insn_code icode1
, icode2
;
11910 optab optab1
, optab2
;
11911 tree vectype
= vectype_in
;
11912 tree wide_vectype
= vectype_out
;
11913 enum tree_code c1
, c2
;
11915 tree prev_type
, intermediate_type
;
11916 machine_mode intermediate_mode
, prev_mode
;
11917 optab optab3
, optab4
;
11919 *multi_step_cvt
= 0;
11921 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11925 case WIDEN_MULT_EXPR
:
11926 /* The result of a vectorized widening operation usually requires
11927 two vectors (because the widened results do not fit into one vector).
11928 The generated vector results would normally be expected to be
11929 generated in the same order as in the original scalar computation,
11930 i.e. if 8 results are generated in each vector iteration, they are
11931 to be organized as follows:
11932 vect1: [res1,res2,res3,res4],
11933 vect2: [res5,res6,res7,res8].
11935 However, in the special case that the result of the widening
11936 operation is used in a reduction computation only, the order doesn't
11937 matter (because when vectorizing a reduction we change the order of
11938 the computation). Some targets can take advantage of this and
11939 generate more efficient code. For example, targets like Altivec,
11940 that support widen_mult using a sequence of {mult_even,mult_odd}
11941 generate the following vectors:
11942 vect1: [res1,res3,res5,res7],
11943 vect2: [res2,res4,res6,res8].
11945 When vectorizing outer-loops, we execute the inner-loop sequentially
11946 (each vectorized inner-loop iteration contributes to VF outer-loop
11947 iterations in parallel). We therefore don't allow to change the
11948 order of the computation in the inner-loop during outer-loop
11950 /* TODO: Another case in which order doesn't *really* matter is when we
11951 widen and then contract again, e.g. (short)((int)x * y >> 8).
11952 Normally, pack_trunc performs an even/odd permute, whereas the
11953 repack from an even/odd expansion would be an interleave, which
11954 would be significantly simpler for e.g. AVX2. */
11955 /* In any case, in order to avoid duplicating the code below, recurse
11956 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11957 are properly set up for the caller. If we fail, we'll continue with
11958 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11960 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11961 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11962 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11963 stmt_info
, vectype_out
,
11964 vectype_in
, code1
, code2
,
11965 multi_step_cvt
, interm_types
))
11967 /* Elements in a vector with vect_used_by_reduction property cannot
11968 be reordered if the use chain with this property does not have the
11969 same operation. One such an example is s += a * b, where elements
11970 in a and b cannot be reordered. Here we check if the vector defined
11971 by STMT is only directly used in the reduction statement. */
11972 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11973 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11975 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11978 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11979 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11982 case DOT_PROD_EXPR
:
11983 c1
= DOT_PROD_EXPR
;
11984 c2
= DOT_PROD_EXPR
;
11992 case VEC_WIDEN_MULT_EVEN_EXPR
:
11993 /* Support the recursion induced just above. */
11994 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11995 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11998 case WIDEN_LSHIFT_EXPR
:
11999 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
12000 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
12004 c1
= VEC_UNPACK_LO_EXPR
;
12005 c2
= VEC_UNPACK_HI_EXPR
;
12009 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
12010 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
12013 case FIX_TRUNC_EXPR
:
12014 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
12015 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
12019 gcc_unreachable ();
12022 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
12023 std::swap (c1
, c2
);
12025 if (code
== FIX_TRUNC_EXPR
)
12027 /* The signedness is determined from output operand. */
12028 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12029 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
12031 else if (CONVERT_EXPR_CODE_P (code
)
12032 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
12033 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12034 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
12035 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12037 /* If the input and result modes are the same, a different optab
12038 is needed where we pass in the number of units in vectype. */
12039 optab1
= vec_unpacks_sbool_lo_optab
;
12040 optab2
= vec_unpacks_sbool_hi_optab
;
12044 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12045 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
12048 if (!optab1
|| !optab2
)
12051 vec_mode
= TYPE_MODE (vectype
);
12052 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
12053 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
12059 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12060 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12062 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12064 /* For scalar masks we may have different boolean
12065 vector types having the same QImode. Thus we
12066 add additional check for elements number. */
12067 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
12068 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12072 /* Check if it's a multi-step conversion that can be done using intermediate
12075 prev_type
= vectype
;
12076 prev_mode
= vec_mode
;
12078 if (!CONVERT_EXPR_CODE_P (code
))
12081 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12082 intermediate steps in promotion sequence. We try
12083 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12085 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12086 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12088 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12089 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12091 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
12094 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
12095 TYPE_UNSIGNED (prev_type
));
12097 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12098 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12099 && intermediate_mode
== prev_mode
12100 && SCALAR_INT_MODE_P (prev_mode
))
12102 /* If the input and result modes are the same, a different optab
12103 is needed where we pass in the number of units in vectype. */
12104 optab3
= vec_unpacks_sbool_lo_optab
;
12105 optab4
= vec_unpacks_sbool_hi_optab
;
12109 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12110 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12113 if (!optab3
|| !optab4
12114 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12115 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12116 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12117 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12118 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12119 == CODE_FOR_nothing
)
12120 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12121 == CODE_FOR_nothing
))
12124 interm_types
->quick_push (intermediate_type
);
12125 (*multi_step_cvt
)++;
12127 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12128 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12130 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12132 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12133 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12137 prev_type
= intermediate_type
;
12138 prev_mode
= intermediate_mode
;
12141 interm_types
->release ();
12146 /* Function supportable_narrowing_operation
12148 Check whether an operation represented by the code CODE is a
12149 narrowing operation that is supported by the target platform in
12150 vector form (i.e., when operating on arguments of type VECTYPE_IN
12151 and producing a result of type VECTYPE_OUT).
12153 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12154 and FLOAT. This function checks if these operations are supported by
12155 the target platform directly via vector tree-codes.
12158 - CODE1 is the code of a vector operation to be used when
12159 vectorizing the operation, if available.
12160 - MULTI_STEP_CVT determines the number of required intermediate steps in
12161 case of multi-step conversion (like int->short->char - in that case
12162 MULTI_STEP_CVT will be 1).
12163 - INTERM_TYPES contains the intermediate type required to perform the
12164 narrowing operation (short in the above example). */
12167 supportable_narrowing_operation (enum tree_code code
,
12168 tree vectype_out
, tree vectype_in
,
12169 enum tree_code
*code1
, int *multi_step_cvt
,
12170 vec
<tree
> *interm_types
)
12172 machine_mode vec_mode
;
12173 enum insn_code icode1
;
12174 optab optab1
, interm_optab
;
12175 tree vectype
= vectype_in
;
12176 tree narrow_vectype
= vectype_out
;
12178 tree intermediate_type
, prev_type
;
12179 machine_mode intermediate_mode
, prev_mode
;
12183 *multi_step_cvt
= 0;
12187 c1
= VEC_PACK_TRUNC_EXPR
;
12188 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12189 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12190 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
12191 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12192 optab1
= vec_pack_sbool_trunc_optab
;
12194 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12197 case FIX_TRUNC_EXPR
:
12198 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12199 /* The signedness is determined from output operand. */
12200 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12204 c1
= VEC_PACK_FLOAT_EXPR
;
12205 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12209 gcc_unreachable ();
12215 vec_mode
= TYPE_MODE (vectype
);
12216 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12221 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12223 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12225 /* For scalar masks we may have different boolean
12226 vector types having the same QImode. Thus we
12227 add additional check for elements number. */
12228 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12229 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12233 if (code
== FLOAT_EXPR
)
12236 /* Check if it's a multi-step conversion that can be done using intermediate
12238 prev_mode
= vec_mode
;
12239 prev_type
= vectype
;
12240 if (code
== FIX_TRUNC_EXPR
)
12241 uns
= TYPE_UNSIGNED (vectype_out
);
12243 uns
= TYPE_UNSIGNED (vectype
);
12245 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12246 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12247 costly than signed. */
12248 if (code
== FIX_TRUNC_EXPR
&& uns
)
12250 enum insn_code icode2
;
12253 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12255 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12256 if (interm_optab
!= unknown_optab
12257 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12258 && insn_data
[icode1
].operand
[0].mode
12259 == insn_data
[icode2
].operand
[0].mode
)
12262 optab1
= interm_optab
;
12267 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12268 intermediate steps in promotion sequence. We try
12269 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12270 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12271 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12273 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12274 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12276 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12279 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12280 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12281 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12282 && intermediate_mode
== prev_mode
12283 && SCALAR_INT_MODE_P (prev_mode
))
12284 interm_optab
= vec_pack_sbool_trunc_optab
;
12287 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12290 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12291 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12292 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12293 == CODE_FOR_nothing
))
12296 interm_types
->quick_push (intermediate_type
);
12297 (*multi_step_cvt
)++;
12299 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12301 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12303 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12304 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12308 prev_mode
= intermediate_mode
;
12309 prev_type
= intermediate_type
;
12310 optab1
= interm_optab
;
12313 interm_types
->release ();
12317 /* Generate and return a statement that sets vector mask MASK such that
12318 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12321 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
12323 tree cmp_type
= TREE_TYPE (start_index
);
12324 tree mask_type
= TREE_TYPE (mask
);
12325 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12326 cmp_type
, mask_type
,
12327 OPTIMIZE_FOR_SPEED
));
12328 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12329 start_index
, end_index
,
12330 build_zero_cst (mask_type
));
12331 gimple_call_set_lhs (call
, mask
);
12335 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12336 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12339 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12342 tree tmp
= make_ssa_name (mask_type
);
12343 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
12344 gimple_seq_add_stmt (seq
, call
);
12345 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12348 /* Try to compute the vector types required to vectorize STMT_INFO,
12349 returning true on success and false if vectorization isn't possible.
12350 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12351 take sure that the number of elements in the vectors is no bigger
12356 - Set *STMT_VECTYPE_OUT to:
12357 - NULL_TREE if the statement doesn't need to be vectorized;
12358 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12360 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12361 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12362 statement does not help to determine the overall number of units. */
12365 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12366 tree
*stmt_vectype_out
,
12367 tree
*nunits_vectype_out
,
12368 unsigned int group_size
)
12370 gimple
*stmt
= stmt_info
->stmt
;
12372 /* For BB vectorization, we should always have a group size once we've
12373 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12374 are tentative requests during things like early data reference
12375 analysis and pattern recognition. */
12376 if (is_a
<bb_vec_info
> (vinfo
))
12377 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12381 *stmt_vectype_out
= NULL_TREE
;
12382 *nunits_vectype_out
= NULL_TREE
;
12384 if (gimple_get_lhs (stmt
) == NULL_TREE
12385 /* MASK_STORE has no lhs, but is ok. */
12386 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12388 if (is_a
<gcall
*> (stmt
))
12390 /* Ignore calls with no lhs. These must be calls to
12391 #pragma omp simd functions, and what vectorization factor
12392 it really needs can't be determined until
12393 vectorizable_simd_clone_call. */
12394 if (dump_enabled_p ())
12395 dump_printf_loc (MSG_NOTE
, vect_location
,
12396 "defer to SIMD clone analysis.\n");
12397 return opt_result::success ();
12400 return opt_result::failure_at (stmt
,
12401 "not vectorized: irregular stmt.%G", stmt
);
12404 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
12405 return opt_result::failure_at (stmt
,
12406 "not vectorized: vector stmt in loop:%G",
12410 tree scalar_type
= NULL_TREE
;
12411 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12413 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12414 if (dump_enabled_p ())
12415 dump_printf_loc (MSG_NOTE
, vect_location
,
12416 "precomputed vectype: %T\n", vectype
);
12418 else if (vect_use_mask_type_p (stmt_info
))
12420 unsigned int precision
= stmt_info
->mask_precision
;
12421 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12422 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12424 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12425 " data-type %T\n", scalar_type
);
12426 if (dump_enabled_p ())
12427 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12431 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12432 scalar_type
= TREE_TYPE (DR_REF (dr
));
12433 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12434 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12436 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12438 if (dump_enabled_p ())
12441 dump_printf_loc (MSG_NOTE
, vect_location
,
12442 "get vectype for scalar type (group size %d):"
12443 " %T\n", group_size
, scalar_type
);
12445 dump_printf_loc (MSG_NOTE
, vect_location
,
12446 "get vectype for scalar type: %T\n", scalar_type
);
12448 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12450 return opt_result::failure_at (stmt
,
12452 " unsupported data-type %T\n",
12455 if (dump_enabled_p ())
12456 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12458 *stmt_vectype_out
= vectype
;
12460 /* Don't try to compute scalar types if the stmt produces a boolean
12461 vector; use the existing vector type instead. */
12462 tree nunits_vectype
= vectype
;
12463 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12465 /* The number of units is set according to the smallest scalar
12466 type (or the largest vector size, but we only support one
12467 vector size per vectorization). */
12468 HOST_WIDE_INT dummy
;
12469 scalar_type
= vect_get_smallest_scalar_type (stmt_info
, &dummy
, &dummy
);
12470 if (scalar_type
!= TREE_TYPE (vectype
))
12472 if (dump_enabled_p ())
12473 dump_printf_loc (MSG_NOTE
, vect_location
,
12474 "get vectype for smallest scalar type: %T\n",
12476 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12478 if (!nunits_vectype
)
12479 return opt_result::failure_at
12480 (stmt
, "not vectorized: unsupported data-type %T\n",
12482 if (dump_enabled_p ())
12483 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12488 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12489 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)));
12491 if (dump_enabled_p ())
12493 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12494 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12495 dump_printf (MSG_NOTE
, "\n");
12498 *nunits_vectype_out
= nunits_vectype
;
12499 return opt_result::success ();