1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE
, vect_location
,
642 "init: stmt relevant? %G", stmt_info
->stmt
);
644 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
645 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
649 /* 2. Process_worklist */
650 while (worklist
.length () > 0)
655 stmt_vec_info stmt_vinfo
= worklist
.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
,
658 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
663 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
678 case vect_reduction_def
:
679 gcc_assert (relevant
!= vect_unused_in_scope
);
680 if (relevant
!= vect_unused_in_scope
681 && relevant
!= vect_used_in_scope
682 && relevant
!= vect_used_by_reduction
683 && relevant
!= vect_used_only_live
)
684 return opt_result::failure_at
685 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
688 case vect_nested_cycle
:
689 if (relevant
!= vect_unused_in_scope
690 && relevant
!= vect_used_in_outer_by_reduction
691 && relevant
!= vect_used_in_outer
)
692 return opt_result::failure_at
693 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
696 case vect_double_reduction_def
:
697 if (relevant
!= vect_unused_in_scope
698 && relevant
!= vect_used_by_reduction
699 && relevant
!= vect_used_only_live
)
700 return opt_result::failure_at
701 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
708 if (is_pattern_stmt_p (stmt_vinfo
))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
715 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
716 tree op
= gimple_assign_rhs1 (assign
);
719 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
722 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
723 loop_vinfo
, relevant
, &worklist
, false);
726 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
727 loop_vinfo
, relevant
, &worklist
, false);
732 for (; i
< gimple_num_ops (assign
); i
++)
734 op
= gimple_op (assign
, i
);
735 if (TREE_CODE (op
) == SSA_NAME
)
738 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
745 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
747 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
749 tree arg
= gimple_call_arg (call
, i
);
751 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
759 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
761 tree op
= USE_FROM_PTR (use_p
);
763 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
771 gather_scatter_info gs_info
;
772 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
775 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
784 } /* while worklist */
786 return opt_result::success ();
789 /* Function vect_model_simple_cost.
791 Models cost for simple operations, i.e. those that only emit ncopies of a
792 single op. Right now, this does not account for multiple insns that could
793 be generated for the single vector op. We will handle that shortly. */
796 vect_model_simple_cost (vec_info
*,
797 stmt_vec_info stmt_info
, int ncopies
,
798 enum vect_def_type
*dt
,
801 stmt_vector_for_cost
*cost_vec
,
802 vect_cost_for_stmt kind
= vector_stmt
)
804 int inside_cost
= 0, prologue_cost
= 0;
806 gcc_assert (cost_vec
!= NULL
);
808 /* ??? Somehow we need to fix this at the callers. */
810 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
813 /* Cost the "broadcast" of a scalar operand in to a vector operand.
814 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
816 for (int i
= 0; i
< ndts
; i
++)
817 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
818 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
819 stmt_info
, 0, vect_prologue
);
821 /* Adjust for two-operator SLP nodes. */
822 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
825 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
826 stmt_info
, 0, vect_body
);
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
831 stmt_info
, 0, vect_body
);
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE
, vect_location
,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
840 /* Model cost for type demotion and promotion operations. PWR is
841 normally zero for single-step promotions and demotions. It will be
842 one if two-step promotion/demotion is required, and so on. NCOPIES
843 is the number of vector results (and thus number of instructions)
844 for the narrowest end of the operation chain. Each additional
845 step doubles the number of instructions required. */
848 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
849 enum vect_def_type
*dt
,
850 unsigned int ncopies
, int pwr
,
851 stmt_vector_for_cost
*cost_vec
)
854 int inside_cost
= 0, prologue_cost
= 0;
856 for (i
= 0; i
< pwr
+ 1; i
++)
858 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_promote_demote
,
859 stmt_info
, 0, vect_body
);
863 /* FORNOW: Assuming maximum 2 args per stmts. */
864 for (i
= 0; i
< 2; i
++)
865 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
866 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
867 stmt_info
, 0, vect_prologue
);
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE
, vect_location
,
871 "vect_model_promotion_demotion_cost: inside_cost = %d, "
872 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
875 /* Returns true if the current function returns DECL. */
878 cfun_returns (tree decl
)
882 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
884 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
887 if (gimple_return_retval (ret
) == decl
)
889 /* We often end up with an aggregate copy to the result decl,
890 handle that case as well. First skip intermediate clobbers
895 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
897 while (gimple_clobber_p (def
));
898 if (is_a
<gassign
*> (def
)
899 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
900 && gimple_assign_rhs1 (def
) == decl
)
906 /* Function vect_model_store_cost
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
912 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
913 vect_memory_access_type memory_access_type
,
914 vec_load_store_type vls_type
, slp_tree slp_node
,
915 stmt_vector_for_cost
*cost_vec
)
917 unsigned int inside_cost
= 0, prologue_cost
= 0;
918 stmt_vec_info first_stmt_info
= stmt_info
;
919 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
921 /* ??? Somehow we need to fix this at the callers. */
923 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
925 if (vls_type
== VLS_STORE_INVARIANT
)
928 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
929 stmt_info
, 0, vect_prologue
);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node
&& grouped_access_p
)
935 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
947 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
949 /* Uses a high and low interleave or shuffle operations for each
951 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
952 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
953 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
954 stmt_info
, 0, vect_body
);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE
, vect_location
,
958 "vect_model_store_cost: strided group_size = %d .\n",
962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
963 /* Costs of the stores. */
964 if (memory_access_type
== VMAT_ELEMENTWISE
965 || memory_access_type
== VMAT_GATHER_SCATTER
)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
969 inside_cost
+= record_stmt_cost (cost_vec
,
970 ncopies
* assumed_nunits
,
971 scalar_store
, stmt_info
, 0, vect_body
);
974 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, &inside_cost
, cost_vec
);
976 if (memory_access_type
== VMAT_ELEMENTWISE
977 || memory_access_type
== VMAT_STRIDED_SLP
)
979 /* N scalar stores plus extracting the elements. */
980 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
981 inside_cost
+= record_stmt_cost (cost_vec
,
982 ncopies
* assumed_nunits
,
983 vec_to_scalar
, stmt_info
, 0, vect_body
);
986 /* When vectorizing a store into the function result assign
987 a penalty if the function returns in a multi-register location.
988 In this case we assume we'll end up with having to spill the
989 vector result and do piecewise loads as a conservative estimate. */
990 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
992 && (TREE_CODE (base
) == RESULT_DECL
993 || (DECL_P (base
) && cfun_returns (base
)))
994 && !aggregate_value_p (base
, cfun
->decl
))
996 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
997 /* ??? Handle PARALLEL in some way. */
1000 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1001 /* Assume that a single reg-reg move is possible and cheap,
1002 do not account for vector to gp register move cost. */
1006 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1008 stmt_info
, 0, vect_epilogue
);
1010 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1012 stmt_info
, 0, vect_epilogue
);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE
, vect_location
,
1019 "vect_model_store_cost: inside_cost = %d, "
1020 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1024 /* Calculate cost of DR's memory access. */
1026 vect_get_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1027 unsigned int *inside_cost
,
1028 stmt_vector_for_cost
*body_cost_vec
)
1030 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1031 int alignment_support_scheme
1032 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1034 switch (alignment_support_scheme
)
1038 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1039 vector_store
, stmt_info
, 0,
1042 if (dump_enabled_p ())
1043 dump_printf_loc (MSG_NOTE
, vect_location
,
1044 "vect_model_store_cost: aligned.\n");
1048 case dr_unaligned_supported
:
1050 /* Here, we assign an additional cost for the unaligned store. */
1051 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1052 unaligned_store
, stmt_info
,
1053 DR_MISALIGNMENT (dr_info
),
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_NOTE
, vect_location
,
1057 "vect_model_store_cost: unaligned supported by "
1062 case dr_unaligned_unsupported
:
1064 *inside_cost
= VECT_MAX_COST
;
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1068 "vect_model_store_cost: unsupported access.\n");
1078 /* Function vect_model_load_cost
1080 Models cost for loads. In the case of grouped accesses, one access has
1081 the overhead of the grouped access attributed to it. Since unaligned
1082 accesses are supported for loads, we also account for the costs of the
1083 access scheme chosen. */
1086 vect_model_load_cost (vec_info
*vinfo
,
1087 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1088 vect_memory_access_type memory_access_type
,
1090 stmt_vector_for_cost
*cost_vec
)
1092 unsigned int inside_cost
= 0, prologue_cost
= 0;
1093 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1095 gcc_assert (cost_vec
);
1097 /* ??? Somehow we need to fix this at the callers. */
1099 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1101 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1106 /* Record the cost for the permutation. */
1108 unsigned assumed_nunits
1109 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1110 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1111 vf
, true, &n_perms
);
1112 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1113 first_stmt_info
, 0, vect_body
);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1117 bitmap_clear (perm
);
1118 for (unsigned i
= 0;
1119 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1120 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1122 bool load_seen
= false;
1123 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1125 if (i
% assumed_nunits
== 0)
1131 if (bitmap_bit_p (perm
, i
))
1137 <= (DR_GROUP_SIZE (first_stmt_info
)
1138 - DR_GROUP_GAP (first_stmt_info
)
1139 + assumed_nunits
- 1) / assumed_nunits
);
1142 /* Grouped loads read all elements in the group at once,
1143 so we want the DR for the first statement. */
1144 stmt_vec_info first_stmt_info
= stmt_info
;
1145 if (!slp_node
&& grouped_access_p
)
1146 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1148 /* True if we should include any once-per-group costs as well as
1149 the cost of the statement itself. For SLP we only get called
1150 once per group anyhow. */
1151 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1158 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1163 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1164 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1165 stmt_info
, 0, vect_body
);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE
, vect_location
,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1173 /* The loads themselves. */
1174 if (memory_access_type
== VMAT_ELEMENTWISE
1175 || memory_access_type
== VMAT_GATHER_SCATTER
)
1177 /* N scalar loads plus gathering them into a vector. */
1178 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1179 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1180 inside_cost
+= record_stmt_cost (cost_vec
,
1181 ncopies
* assumed_nunits
,
1182 scalar_load
, stmt_info
, 0, vect_body
);
1185 vect_get_load_cost (vinfo
, stmt_info
, ncopies
, first_stmt_p
,
1186 &inside_cost
, &prologue_cost
,
1187 cost_vec
, cost_vec
, true);
1188 if (memory_access_type
== VMAT_ELEMENTWISE
1189 || memory_access_type
== VMAT_STRIDED_SLP
)
1190 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1191 stmt_info
, 0, vect_body
);
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_NOTE
, vect_location
,
1195 "vect_model_load_cost: inside_cost = %d, "
1196 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1200 /* Calculate cost of DR's memory access. */
1202 vect_get_load_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1203 bool add_realign_cost
, unsigned int *inside_cost
,
1204 unsigned int *prologue_cost
,
1205 stmt_vector_for_cost
*prologue_cost_vec
,
1206 stmt_vector_for_cost
*body_cost_vec
,
1207 bool record_prologue_costs
)
1209 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1210 int alignment_support_scheme
1211 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1213 switch (alignment_support_scheme
)
1217 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1218 stmt_info
, 0, vect_body
);
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE
, vect_location
,
1222 "vect_model_load_cost: aligned.\n");
1226 case dr_unaligned_supported
:
1228 /* Here, we assign an additional cost for the unaligned load. */
1229 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1230 unaligned_load
, stmt_info
,
1231 DR_MISALIGNMENT (dr_info
),
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_NOTE
, vect_location
,
1236 "vect_model_load_cost: unaligned supported by "
1241 case dr_explicit_realign
:
1243 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1244 vector_load
, stmt_info
, 0, vect_body
);
1245 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1246 vec_perm
, stmt_info
, 0, vect_body
);
1248 /* FIXME: If the misalignment remains fixed across the iterations of
1249 the containing loop, the following cost should be added to the
1251 if (targetm
.vectorize
.builtin_mask_for_load
)
1252 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1253 stmt_info
, 0, vect_body
);
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE
, vect_location
,
1257 "vect_model_load_cost: explicit realign\n");
1261 case dr_explicit_realign_optimized
:
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE
, vect_location
,
1265 "vect_model_load_cost: unaligned software "
1268 /* Unaligned software pipeline has a load of an address, an initial
1269 load, and possibly a mask operation to "prime" the loop. However,
1270 if this is an access in a group of loads, which provide grouped
1271 access, then the above cost should only be considered for one
1272 access in the group. Inside the loop, there is a load op
1273 and a realignment op. */
1275 if (add_realign_cost
&& record_prologue_costs
)
1277 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1278 vector_stmt
, stmt_info
,
1280 if (targetm
.vectorize
.builtin_mask_for_load
)
1281 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1282 vector_stmt
, stmt_info
,
1286 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1287 stmt_info
, 0, vect_body
);
1288 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1289 stmt_info
, 0, vect_body
);
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE
, vect_location
,
1293 "vect_model_load_cost: explicit realign optimized"
1299 case dr_unaligned_unsupported
:
1301 *inside_cost
= VECT_MAX_COST
;
1303 if (dump_enabled_p ())
1304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1305 "vect_model_load_cost: unsupported access.\n");
1314 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1315 the loop preheader for the vectorized stmt STMT_VINFO. */
1318 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1319 gimple_stmt_iterator
*gsi
)
1322 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1325 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1329 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1333 if (stmt_vinfo
&& nested_in_vect_loop_p (loop
, stmt_vinfo
))
1336 pe
= loop_preheader_edge (loop
);
1337 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1338 gcc_assert (!new_bb
);
1342 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
1343 gimple_stmt_iterator gsi_region_begin
= bb_vinfo
->region_begin
;
1344 gsi_insert_before (&gsi_region_begin
, new_stmt
, GSI_SAME_STMT
);
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE
, vect_location
,
1350 "created new init_stmt: %G", new_stmt
);
1353 /* Function vect_init_vector.
1355 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1356 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1357 vector type a vector with all elements equal to VAL is created first.
1358 Place the initialization at GSI if it is not NULL. Otherwise, place the
1359 initialization at the loop preheader.
1360 Return the DEF of INIT_STMT.
1361 It will be used in the vectorization of STMT_INFO. */
1364 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1365 gimple_stmt_iterator
*gsi
)
1370 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1371 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1373 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1374 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1376 /* Scalar boolean value should be transformed into
1377 all zeros or all ones value before building a vector. */
1378 if (VECTOR_BOOLEAN_TYPE_P (type
))
1380 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1381 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1383 if (CONSTANT_CLASS_P (val
))
1384 val
= integer_zerop (val
) ? false_val
: true_val
;
1387 new_temp
= make_ssa_name (TREE_TYPE (type
));
1388 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1389 val
, true_val
, false_val
);
1390 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1396 gimple_seq stmts
= NULL
;
1397 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1398 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1399 TREE_TYPE (type
), val
);
1401 /* ??? Condition vectorization expects us to do
1402 promotion of invariant/external defs. */
1403 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1404 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1405 !gsi_end_p (gsi2
); )
1407 init_stmt
= gsi_stmt (gsi2
);
1408 gsi_remove (&gsi2
, false);
1409 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1413 val
= build_vector_from_val (type
, val
);
1416 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1417 init_stmt
= gimple_build_assign (new_temp
, val
);
1418 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1422 /* Function vect_get_vec_def_for_operand_1.
1424 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1425 with type DT that will be used in the vectorized stmt. */
1428 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1429 enum vect_def_type dt
)
1432 stmt_vec_info vec_stmt_info
;
1436 /* operand is a constant or a loop invariant. */
1437 case vect_constant_def
:
1438 case vect_external_def
:
1439 /* Code should use vect_get_vec_def_for_operand. */
1442 /* Operand is defined by a loop header phi. In case of nested
1443 cycles we also may have uses of the backedge def. */
1444 case vect_reduction_def
:
1445 case vect_double_reduction_def
:
1446 case vect_nested_cycle
:
1447 case vect_induction_def
:
1448 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1449 || dt
== vect_nested_cycle
);
1452 /* operand is defined inside the loop. */
1453 case vect_internal_def
:
1455 /* Get the def from the vectorized stmt. */
1456 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1457 /* Get vectorized pattern statement. */
1459 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1460 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1461 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1462 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1463 gcc_assert (vec_stmt_info
);
1464 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1465 vec_oprnd
= PHI_RESULT (phi
);
1467 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1477 /* Function vect_get_vec_def_for_operand.
1479 OP is an operand in STMT_VINFO. This function returns a (vector) def
1480 that will be used in the vectorized stmt for STMT_VINFO.
1482 In the case that OP is an SSA_NAME which is defined in the loop, then
1483 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1485 In case OP is an invariant or constant, a new stmt that creates a vector def
1486 needs to be introduced. VECTYPE may be used to specify a required type for
1487 vector invariant. */
1490 vect_get_vec_def_for_operand (vec_info
*vinfo
,
1491 tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1494 enum vect_def_type dt
;
1496 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1498 if (dump_enabled_p ())
1499 dump_printf_loc (MSG_NOTE
, vect_location
,
1500 "vect_get_vec_def_for_operand: %T\n", op
);
1502 stmt_vec_info def_stmt_info
;
1503 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1504 &def_stmt_info
, &def_stmt
);
1505 gcc_assert (is_simple_use
);
1506 if (def_stmt
&& dump_enabled_p ())
1507 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1509 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1511 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1515 vector_type
= vectype
;
1516 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1517 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1518 vector_type
= truth_type_for (stmt_vectype
);
1520 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1522 gcc_assert (vector_type
);
1523 return vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1526 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1530 /* Function vect_get_vec_def_for_stmt_copy
1532 Return a vector-def for an operand. This function is used when the
1533 vectorized stmt to be created (by the caller to this function) is a "copy"
1534 created in case the vectorized result cannot fit in one vector, and several
1535 copies of the vector-stmt are required. In this case the vector-def is
1536 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1537 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1540 In case the vectorization factor (VF) is bigger than the number
1541 of elements that can fit in a vectype (nunits), we have to generate
1542 more than one vector stmt to vectorize the scalar stmt. This situation
1543 arises when there are multiple data-types operated upon in the loop; the
1544 smallest data-type determines the VF, and as a result, when vectorizing
1545 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1546 vector stmt (each computing a vector of 'nunits' results, and together
1547 computing 'VF' results in each iteration). This function is called when
1548 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1549 which VF=16 and nunits=4, so the number of copies required is 4):
1551 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1553 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1554 VS1.1: vx.1 = memref1 VS1.2
1555 VS1.2: vx.2 = memref2 VS1.3
1556 VS1.3: vx.3 = memref3
1558 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1559 VSnew.1: vz1 = vx.1 + ... VSnew.2
1560 VSnew.2: vz2 = vx.2 + ... VSnew.3
1561 VSnew.3: vz3 = vx.3 + ...
1563 The vectorization of S1 is explained in vectorizable_load.
1564 The vectorization of S2:
1565 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1566 the function 'vect_get_vec_def_for_operand' is called to
1567 get the relevant vector-def for each operand of S2. For operand x it
1568 returns the vector-def 'vx.0'.
1570 To create the remaining copies of the vector-stmt (VSnew.j), this
1571 function is called to get the relevant vector-def for each operand. It is
1572 obtained from the respective VS1.j stmt, which is recorded in the
1573 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1575 For example, to obtain the vector-def 'vx.1' in order to create the
1576 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1577 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1578 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1579 and return its def ('vx.1').
1580 Overall, to create the above sequence this function will be called 3 times:
1581 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1582 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1583 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1586 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1588 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1590 /* Do nothing; can reuse same def. */
1593 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1594 gcc_assert (def_stmt_info
);
1595 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1596 vec_oprnd
= PHI_RESULT (phi
);
1598 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1603 /* Get vectorized definitions for the operands to create a copy of an original
1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1607 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1608 vec
<tree
> *vec_oprnds0
,
1609 vec
<tree
> *vec_oprnds1
)
1611 tree vec_oprnd
= vec_oprnds0
->pop ();
1613 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1614 vec_oprnds0
->quick_push (vec_oprnd
);
1616 if (vec_oprnds1
&& vec_oprnds1
->length ())
1618 vec_oprnd
= vec_oprnds1
->pop ();
1619 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1620 vec_oprnds1
->quick_push (vec_oprnd
);
1625 /* Get vectorized definitions for OP0 and OP1. */
1628 vect_get_vec_defs (vec_info
*vinfo
, tree op0
, tree op1
, stmt_vec_info stmt_info
,
1629 vec
<tree
> *vec_oprnds0
,
1630 vec
<tree
> *vec_oprnds1
,
1635 auto_vec
<vec
<tree
> > vec_defs (SLP_TREE_CHILDREN (slp_node
).length ());
1636 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
, op1
? 2 : 1);
1637 *vec_oprnds0
= vec_defs
[0];
1639 *vec_oprnds1
= vec_defs
[1];
1645 vec_oprnds0
->create (1);
1646 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, op0
, stmt_info
);
1647 vec_oprnds0
->quick_push (vec_oprnd
);
1651 vec_oprnds1
->create (1);
1652 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, op1
, stmt_info
);
1653 vec_oprnds1
->quick_push (vec_oprnd
);
1658 /* Helper function called by vect_finish_replace_stmt and
1659 vect_finish_stmt_generation. Set the location of the new
1660 statement and create and return a stmt_vec_info for it. */
1662 static stmt_vec_info
1663 vect_finish_stmt_generation_1 (vec_info
*vinfo
,
1664 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1666 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1668 if (dump_enabled_p ())
1669 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1673 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1675 /* While EH edges will generally prevent vectorization, stmt might
1676 e.g. be in a must-not-throw region. Ensure newly created stmts
1677 that could throw are part of the same region. */
1678 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1679 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1680 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1683 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1685 return vec_stmt_info
;
1688 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1689 which sets the same scalar result as STMT_INFO did. Create and return a
1690 stmt_vec_info for VEC_STMT. */
1693 vect_finish_replace_stmt (vec_info
*vinfo
,
1694 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1696 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1697 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1699 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1700 gsi_replace (&gsi
, vec_stmt
, true);
1702 return vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1705 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1706 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1709 vect_finish_stmt_generation (vec_info
*vinfo
,
1710 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1711 gimple_stmt_iterator
*gsi
)
1713 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1715 if (!gsi_end_p (*gsi
)
1716 && gimple_has_mem_ops (vec_stmt
))
1718 gimple
*at_stmt
= gsi_stmt (*gsi
);
1719 tree vuse
= gimple_vuse (at_stmt
);
1720 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1722 tree vdef
= gimple_vdef (at_stmt
);
1723 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1724 /* If we have an SSA vuse and insert a store, update virtual
1725 SSA form to avoid triggering the renamer. Do so only
1726 if we can easily see all uses - which is what almost always
1727 happens with the way vectorized stmts are inserted. */
1728 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1729 && ((is_gimple_assign (vec_stmt
)
1730 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1731 || (is_gimple_call (vec_stmt
)
1732 && !(gimple_call_flags (vec_stmt
)
1733 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1735 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1736 gimple_set_vdef (vec_stmt
, new_vdef
);
1737 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1741 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1742 return vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1745 /* We want to vectorize a call to combined function CFN with function
1746 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1747 as the types of all inputs. Check whether this is possible using
1748 an internal function, returning its code if so or IFN_LAST if not. */
1751 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1752 tree vectype_out
, tree vectype_in
)
1755 if (internal_fn_p (cfn
))
1756 ifn
= as_internal_fn (cfn
);
1758 ifn
= associated_internal_fn (fndecl
);
1759 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1761 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1762 if (info
.vectorizable
)
1764 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1765 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1766 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1767 OPTIMIZE_FOR_SPEED
))
1775 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1776 gimple_stmt_iterator
*);
1778 /* Check whether a load or store statement in the loop described by
1779 LOOP_VINFO is possible in a fully-masked loop. This is testing
1780 whether the vectorizer pass has the appropriate support, as well as
1781 whether the target does.
1783 VLS_TYPE says whether the statement is a load or store and VECTYPE
1784 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1785 says how the load or store is going to be implemented and GROUP_SIZE
1786 is the number of load or store statements in the containing group.
1787 If the access is a gather load or scatter store, GS_INFO describes
1788 its arguments. If the load or store is conditional, SCALAR_MASK is the
1789 condition under which it occurs.
1791 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1792 supported, otherwise record the required mask types. */
1795 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1796 vec_load_store_type vls_type
, int group_size
,
1797 vect_memory_access_type memory_access_type
,
1798 gather_scatter_info
*gs_info
, tree scalar_mask
)
1800 /* Invariant loads need no special support. */
1801 if (memory_access_type
== VMAT_INVARIANT
)
1804 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1805 machine_mode vecmode
= TYPE_MODE (vectype
);
1806 bool is_load
= (vls_type
== VLS_LOAD
);
1807 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1810 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1811 : !vect_store_lanes_supported (vectype
, group_size
, true))
1813 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1815 "can't use a fully-masked loop because the"
1816 " target doesn't have an appropriate masked"
1817 " load/store-lanes instruction.\n");
1818 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1821 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1822 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1826 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1828 internal_fn ifn
= (is_load
1829 ? IFN_MASK_GATHER_LOAD
1830 : IFN_MASK_SCATTER_STORE
);
1831 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1832 gs_info
->memory_type
,
1833 gs_info
->offset_vectype
,
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1838 "can't use a fully-masked loop because the"
1839 " target doesn't have an appropriate masked"
1840 " gather load or scatter store instruction.\n");
1841 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1844 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1845 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1849 if (memory_access_type
!= VMAT_CONTIGUOUS
1850 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1852 /* Element X of the data must come from iteration i * VF + X of the
1853 scalar loop. We need more work to support other mappings. */
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1856 "can't use a fully-masked loop because an access"
1857 " isn't contiguous.\n");
1858 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1862 machine_mode mask_mode
;
1863 if (!VECTOR_MODE_P (vecmode
)
1864 || !targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1865 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1869 "can't use a fully-masked loop because the target"
1870 " doesn't have the appropriate masked load or"
1872 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1875 /* We might load more scalars than we need for permuting SLP loads.
1876 We checked in get_group_load_store_type that the extra elements
1877 don't leak into a new vector. */
1878 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1879 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1880 unsigned int nvectors
;
1881 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1882 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1887 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1888 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1889 that needs to be applied to all loads and stores in a vectorized loop.
1890 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1892 MASK_TYPE is the type of both masks. If new statements are needed,
1893 insert them before GSI. */
1896 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1897 gimple_stmt_iterator
*gsi
)
1899 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1903 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1904 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1905 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1906 vec_mask
, loop_mask
);
1907 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1911 /* Determine whether we can use a gather load or scatter store to vectorize
1912 strided load or store STMT_INFO by truncating the current offset to a
1913 smaller width. We need to be able to construct an offset vector:
1915 { 0, X, X*2, X*3, ... }
1917 without loss of precision, where X is STMT_INFO's DR_STEP.
1919 Return true if this is possible, describing the gather load or scatter
1920 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1923 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1924 loop_vec_info loop_vinfo
, bool masked_p
,
1925 gather_scatter_info
*gs_info
)
1927 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1928 data_reference
*dr
= dr_info
->dr
;
1929 tree step
= DR_STEP (dr
);
1930 if (TREE_CODE (step
) != INTEGER_CST
)
1932 /* ??? Perhaps we could use range information here? */
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_NOTE
, vect_location
,
1935 "cannot truncate variable step.\n");
1939 /* Get the number of bits in an element. */
1940 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1941 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1942 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1944 /* Set COUNT to the upper limit on the number of elements - 1.
1945 Start with the maximum vectorization factor. */
1946 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1948 /* Try lowering COUNT to the number of scalar latch iterations. */
1949 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1950 widest_int max_iters
;
1951 if (max_loop_iterations (loop
, &max_iters
)
1952 && max_iters
< count
)
1953 count
= max_iters
.to_shwi ();
1955 /* Try scales of 1 and the element size. */
1956 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1957 wi::overflow_type overflow
= wi::OVF_NONE
;
1958 for (int i
= 0; i
< 2; ++i
)
1960 int scale
= scales
[i
];
1962 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1965 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1966 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1969 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1970 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1972 /* Find the narrowest viable offset type. */
1973 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1974 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1977 /* See whether the target supports the operation with an offset
1978 no narrower than OFFSET_TYPE. */
1979 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1980 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1981 vectype
, memory_type
, offset_type
, scale
,
1982 &gs_info
->ifn
, &gs_info
->offset_vectype
))
1985 gs_info
->decl
= NULL_TREE
;
1986 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1987 but we don't need to store that here. */
1988 gs_info
->base
= NULL_TREE
;
1989 gs_info
->element_type
= TREE_TYPE (vectype
);
1990 gs_info
->offset
= fold_convert (offset_type
, step
);
1991 gs_info
->offset_dt
= vect_constant_def
;
1992 gs_info
->scale
= scale
;
1993 gs_info
->memory_type
= memory_type
;
1997 if (overflow
&& dump_enabled_p ())
1998 dump_printf_loc (MSG_NOTE
, vect_location
,
1999 "truncating gather/scatter offset to %d bits"
2000 " might change its value.\n", element_bits
);
2005 /* Return true if we can use gather/scatter internal functions to
2006 vectorize STMT_INFO, which is a grouped or strided load or store.
2007 MASKED_P is true if load or store is conditional. When returning
2008 true, fill in GS_INFO with the information required to perform the
2012 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2013 loop_vec_info loop_vinfo
, bool masked_p
,
2014 gather_scatter_info
*gs_info
)
2016 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2018 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2021 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
2022 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2024 gcc_assert (TYPE_PRECISION (new_offset_type
)
2025 >= TYPE_PRECISION (old_offset_type
));
2026 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_NOTE
, vect_location
,
2030 "using gather/scatter for strided/grouped access,"
2031 " scale = %d\n", gs_info
->scale
);
2036 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2037 elements with a known constant step. Return -1 if that step
2038 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2041 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
2043 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2044 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
2048 /* If the target supports a permute mask that reverses the elements in
2049 a vector of type VECTYPE, return that mask, otherwise return null. */
2052 perm_mask_for_reverse (tree vectype
)
2054 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2056 /* The encoding has a single stepped pattern. */
2057 vec_perm_builder
sel (nunits
, 1, 3);
2058 for (int i
= 0; i
< 3; ++i
)
2059 sel
.quick_push (nunits
- 1 - i
);
2061 vec_perm_indices
indices (sel
, 1, nunits
);
2062 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2064 return vect_gen_perm_mask_checked (vectype
, indices
);
2067 /* A subroutine of get_load_store_type, with a subset of the same
2068 arguments. Handle the case where STMT_INFO is a load or store that
2069 accesses consecutive elements with a negative step. */
2071 static vect_memory_access_type
2072 get_negative_load_store_type (vec_info
*vinfo
,
2073 stmt_vec_info stmt_info
, tree vectype
,
2074 vec_load_store_type vls_type
,
2075 unsigned int ncopies
)
2077 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2078 dr_alignment_support alignment_support_scheme
;
2082 if (dump_enabled_p ())
2083 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2084 "multiple types with negative step.\n");
2085 return VMAT_ELEMENTWISE
;
2088 alignment_support_scheme
= vect_supportable_dr_alignment (vinfo
,
2090 if (alignment_support_scheme
!= dr_aligned
2091 && alignment_support_scheme
!= dr_unaligned_supported
)
2093 if (dump_enabled_p ())
2094 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2095 "negative step but alignment required.\n");
2096 return VMAT_ELEMENTWISE
;
2099 if (vls_type
== VLS_STORE_INVARIANT
)
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_NOTE
, vect_location
,
2103 "negative step with invariant source;"
2104 " no permute needed.\n");
2105 return VMAT_CONTIGUOUS_DOWN
;
2108 if (!perm_mask_for_reverse (vectype
))
2110 if (dump_enabled_p ())
2111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2112 "negative step and reversing not supported.\n");
2113 return VMAT_ELEMENTWISE
;
2116 return VMAT_CONTIGUOUS_REVERSE
;
2119 /* STMT_INFO is either a masked or unconditional store. Return the value
2123 vect_get_store_rhs (stmt_vec_info stmt_info
)
2125 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2127 gcc_assert (gimple_assign_single_p (assign
));
2128 return gimple_assign_rhs1 (assign
);
2130 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2132 internal_fn ifn
= gimple_call_internal_fn (call
);
2133 int index
= internal_fn_stored_value_index (ifn
);
2134 gcc_assert (index
>= 0);
2135 return gimple_call_arg (call
, index
);
2140 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2142 This function returns a vector type which can be composed with NETLS pieces,
2143 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2144 same vector size as the return vector. It checks target whether supports
2145 pieces-size vector mode for construction firstly, if target fails to, check
2146 pieces-size scalar mode for construction further. It returns NULL_TREE if
2147 fails to find the available composition.
2149 For example, for (vtype=V16QI, nelts=4), we can probably get:
2150 - V16QI with PTYPE V4QI.
2151 - V4SI with PTYPE SI.
2155 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2157 gcc_assert (VECTOR_TYPE_P (vtype
));
2158 gcc_assert (known_gt (nelts
, 0U));
2160 machine_mode vmode
= TYPE_MODE (vtype
);
2161 if (!VECTOR_MODE_P (vmode
))
2164 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2165 unsigned int pbsize
;
2166 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2168 /* First check if vec_init optab supports construction from
2169 vector pieces directly. */
2170 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2171 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2173 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2174 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2175 != CODE_FOR_nothing
))
2177 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2181 /* Otherwise check if exists an integer type of the same piece size and
2182 if vec_init optab supports construction from it directly. */
2183 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2184 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2185 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2186 != CODE_FOR_nothing
))
2188 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2189 return build_vector_type (*ptype
, nelts
);
2196 /* A subroutine of get_load_store_type, with a subset of the same
2197 arguments. Handle the case where STMT_INFO is part of a grouped load
2200 For stores, the statements in the group are all consecutive
2201 and there is no gap at the end. For loads, the statements in the
2202 group might not be consecutive; there can be gaps between statements
2203 as well as at the end. */
2206 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2207 tree vectype
, bool slp
,
2208 bool masked_p
, vec_load_store_type vls_type
,
2209 vect_memory_access_type
*memory_access_type
,
2210 gather_scatter_info
*gs_info
)
2212 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2213 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2214 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2215 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2216 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2217 bool single_element_p
= (stmt_info
== first_stmt_info
2218 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2219 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2220 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2222 /* True if the vectorized statements would access beyond the last
2223 statement in the group. */
2224 bool overrun_p
= false;
2226 /* True if we can cope with such overrun by peeling for gaps, so that
2227 there is at least one final scalar iteration after the vector loop. */
2228 bool can_overrun_p
= (!masked_p
2229 && vls_type
== VLS_LOAD
2233 /* There can only be a gap at the end of the group if the stride is
2234 known at compile time. */
2235 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2237 /* Stores can't yet have gaps. */
2238 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2242 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2244 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2245 separated by the stride, until we have a complete vector.
2246 Fall back to scalar accesses if that isn't possible. */
2247 if (multiple_p (nunits
, group_size
))
2248 *memory_access_type
= VMAT_STRIDED_SLP
;
2250 *memory_access_type
= VMAT_ELEMENTWISE
;
2254 overrun_p
= loop_vinfo
&& gap
!= 0;
2255 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2257 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2258 "Grouped store with gaps requires"
2259 " non-consecutive accesses\n");
2262 /* An overrun is fine if the trailing elements are smaller
2263 than the alignment boundary B. Every vector access will
2264 be a multiple of B and so we are guaranteed to access a
2265 non-gap element in the same B-sized block. */
2267 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2268 / vect_get_scalar_dr_size (first_dr_info
)))
2271 /* If the gap splits the vector in half and the target
2272 can do half-vector operations avoid the epilogue peeling
2273 by simply loading half of the vector only. Usually
2274 the construction with an upper zero half will be elided. */
2275 dr_alignment_support alignment_support_scheme
;
2279 && (((alignment_support_scheme
2280 = vect_supportable_dr_alignment (vinfo
,
2281 first_dr_info
, false)))
2283 || alignment_support_scheme
== dr_unaligned_supported
)
2284 && known_eq (nunits
, (group_size
- gap
) * 2)
2285 && known_eq (nunits
, group_size
)
2286 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2290 if (overrun_p
&& !can_overrun_p
)
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2294 "Peeling for outer loop is not supported\n");
2297 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2299 *memory_access_type
= get_negative_load_store_type
2300 (vinfo
, stmt_info
, vectype
, vls_type
, 1);
2303 gcc_assert (!loop_vinfo
|| cmp
> 0);
2304 *memory_access_type
= VMAT_CONTIGUOUS
;
2310 /* We can always handle this case using elementwise accesses,
2311 but see if something more efficient is available. */
2312 *memory_access_type
= VMAT_ELEMENTWISE
;
2314 /* If there is a gap at the end of the group then these optimizations
2315 would access excess elements in the last iteration. */
2316 bool would_overrun_p
= (gap
!= 0);
2317 /* An overrun is fine if the trailing elements are smaller than the
2318 alignment boundary B. Every vector access will be a multiple of B
2319 and so we are guaranteed to access a non-gap element in the
2320 same B-sized block. */
2323 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2324 / vect_get_scalar_dr_size (first_dr_info
)))
2325 would_overrun_p
= false;
2327 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2328 && (can_overrun_p
|| !would_overrun_p
)
2329 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2331 /* First cope with the degenerate case of a single-element
2333 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2334 *memory_access_type
= VMAT_CONTIGUOUS
;
2336 /* Otherwise try using LOAD/STORE_LANES. */
2337 if (*memory_access_type
== VMAT_ELEMENTWISE
2338 && (vls_type
== VLS_LOAD
2339 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2340 : vect_store_lanes_supported (vectype
, group_size
,
2343 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2344 overrun_p
= would_overrun_p
;
2347 /* If that fails, try using permuting loads. */
2348 if (*memory_access_type
== VMAT_ELEMENTWISE
2349 && (vls_type
== VLS_LOAD
2350 ? vect_grouped_load_supported (vectype
, single_element_p
,
2352 : vect_grouped_store_supported (vectype
, group_size
)))
2354 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2355 overrun_p
= would_overrun_p
;
2359 /* As a last resort, trying using a gather load or scatter store.
2361 ??? Although the code can handle all group sizes correctly,
2362 it probably isn't a win to use separate strided accesses based
2363 on nearby locations. Or, even if it's a win over scalar code,
2364 it might not be a win over vectorizing at a lower VF, if that
2365 allows us to use contiguous accesses. */
2366 if (*memory_access_type
== VMAT_ELEMENTWISE
2369 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2371 *memory_access_type
= VMAT_GATHER_SCATTER
;
2374 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2376 /* STMT is the leader of the group. Check the operands of all the
2377 stmts of the group. */
2378 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2379 while (next_stmt_info
)
2381 tree op
= vect_get_store_rhs (next_stmt_info
);
2382 enum vect_def_type dt
;
2383 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2387 "use not simple.\n");
2390 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2396 gcc_assert (can_overrun_p
);
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2399 "Data access with gaps requires scalar "
2401 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2407 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2408 if there is a memory access type that the vectorized form can use,
2409 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2410 or scatters, fill in GS_INFO accordingly.
2412 SLP says whether we're performing SLP rather than loop vectorization.
2413 MASKED_P is true if the statement is conditional on a vectorized mask.
2414 VECTYPE is the vector type that the vectorized statements will use.
2415 NCOPIES is the number of vector statements that will be needed. */
2418 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2419 tree vectype
, bool slp
,
2420 bool masked_p
, vec_load_store_type vls_type
,
2421 unsigned int ncopies
,
2422 vect_memory_access_type
*memory_access_type
,
2423 gather_scatter_info
*gs_info
)
2425 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2426 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2427 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2429 *memory_access_type
= VMAT_GATHER_SCATTER
;
2430 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2432 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2433 &gs_info
->offset_dt
,
2434 &gs_info
->offset_vectype
))
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2438 "%s index use not simple.\n",
2439 vls_type
== VLS_LOAD
? "gather" : "scatter");
2443 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2445 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp
, masked_p
,
2446 vls_type
, memory_access_type
, gs_info
))
2449 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2453 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2455 *memory_access_type
= VMAT_GATHER_SCATTER
;
2457 *memory_access_type
= VMAT_ELEMENTWISE
;
2461 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2463 *memory_access_type
= get_negative_load_store_type
2464 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
);
2467 gcc_assert (vls_type
== VLS_LOAD
);
2468 *memory_access_type
= VMAT_INVARIANT
;
2471 *memory_access_type
= VMAT_CONTIGUOUS
;
2474 if ((*memory_access_type
== VMAT_ELEMENTWISE
2475 || *memory_access_type
== VMAT_STRIDED_SLP
)
2476 && !nunits
.is_constant ())
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2480 "Not using elementwise accesses due to variable "
2481 "vectorization factor.\n");
2485 /* FIXME: At the moment the cost model seems to underestimate the
2486 cost of using elementwise accesses. This check preserves the
2487 traditional behavior until that can be fixed. */
2488 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2489 if (!first_stmt_info
)
2490 first_stmt_info
= stmt_info
;
2491 if (*memory_access_type
== VMAT_ELEMENTWISE
2492 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2493 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2494 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2495 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2497 if (dump_enabled_p ())
2498 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2499 "not falling back to elementwise accesses\n");
2505 /* Return true if boolean argument MASK is suitable for vectorizing
2506 conditional operation STMT_INFO. When returning true, store the type
2507 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2508 in *MASK_VECTYPE_OUT. */
2511 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree mask
,
2512 vect_def_type
*mask_dt_out
,
2513 tree
*mask_vectype_out
)
2515 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2519 "mask argument is not a boolean.\n");
2523 if (TREE_CODE (mask
) != SSA_NAME
)
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2527 "mask argument is not an SSA name.\n");
2531 enum vect_def_type mask_dt
;
2533 if (!vect_is_simple_use (mask
, vinfo
, &mask_dt
, &mask_vectype
))
2535 if (dump_enabled_p ())
2536 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2537 "mask use not simple.\n");
2541 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2543 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2545 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2547 if (dump_enabled_p ())
2548 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2549 "could not find an appropriate vector mask type.\n");
2553 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2554 TYPE_VECTOR_SUBPARTS (vectype
)))
2556 if (dump_enabled_p ())
2557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2558 "vector mask type %T"
2559 " does not match vector data type %T.\n",
2560 mask_vectype
, vectype
);
2565 *mask_dt_out
= mask_dt
;
2566 *mask_vectype_out
= mask_vectype
;
2570 /* Return true if stored value RHS is suitable for vectorizing store
2571 statement STMT_INFO. When returning true, store the type of the
2572 definition in *RHS_DT_OUT, the type of the vectorized store value in
2573 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2576 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2577 slp_tree slp_node
, tree rhs
,
2578 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2579 vec_load_store_type
*vls_type_out
)
2581 /* In the case this is a store from a constant make sure
2582 native_encode_expr can handle it. */
2583 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2585 if (dump_enabled_p ())
2586 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2587 "cannot encode constant as a byte sequence.\n");
2591 enum vect_def_type rhs_dt
;
2594 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
2595 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2597 if (dump_enabled_p ())
2598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2599 "use not simple.\n");
2603 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2604 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2606 if (dump_enabled_p ())
2607 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2608 "incompatible vector types.\n");
2612 *rhs_dt_out
= rhs_dt
;
2613 *rhs_vectype_out
= rhs_vectype
;
2614 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2615 *vls_type_out
= VLS_STORE_INVARIANT
;
2617 *vls_type_out
= VLS_STORE
;
2621 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2622 Note that we support masks with floating-point type, in which case the
2623 floats are interpreted as a bitmask. */
2626 vect_build_all_ones_mask (vec_info
*vinfo
,
2627 stmt_vec_info stmt_info
, tree masktype
)
2629 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2630 return build_int_cst (masktype
, -1);
2631 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2633 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2634 mask
= build_vector_from_val (masktype
, mask
);
2635 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2637 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2641 for (int j
= 0; j
< 6; ++j
)
2643 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2644 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2645 mask
= build_vector_from_val (masktype
, mask
);
2646 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2651 /* Build an all-zero merge value of type VECTYPE while vectorizing
2652 STMT_INFO as a gather load. */
2655 vect_build_zero_merge_argument (vec_info
*vinfo
,
2656 stmt_vec_info stmt_info
, tree vectype
)
2659 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2660 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2661 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2665 for (int j
= 0; j
< 6; ++j
)
2667 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2668 merge
= build_real (TREE_TYPE (vectype
), r
);
2672 merge
= build_vector_from_val (vectype
, merge
);
2673 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2676 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2677 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2678 the gather load operation. If the load is conditional, MASK is the
2679 unvectorized condition and MASK_DT is its definition type, otherwise
2683 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2684 gimple_stmt_iterator
*gsi
,
2685 stmt_vec_info
*vec_stmt
,
2686 gather_scatter_info
*gs_info
,
2689 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2690 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2691 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2692 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2693 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2694 edge pe
= loop_preheader_edge (loop
);
2695 enum { NARROW
, NONE
, WIDEN
} modifier
;
2696 poly_uint64 gather_off_nunits
2697 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2699 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2700 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2701 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2702 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2703 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2704 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2705 tree scaletype
= TREE_VALUE (arglist
);
2706 tree real_masktype
= masktype
;
2707 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2709 || TREE_CODE (masktype
) == INTEGER_TYPE
2710 || types_compatible_p (srctype
, masktype
)));
2711 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2712 masktype
= truth_type_for (srctype
);
2714 tree mask_halftype
= masktype
;
2715 tree perm_mask
= NULL_TREE
;
2716 tree mask_perm_mask
= NULL_TREE
;
2717 if (known_eq (nunits
, gather_off_nunits
))
2719 else if (known_eq (nunits
* 2, gather_off_nunits
))
2723 /* Currently widening gathers and scatters are only supported for
2724 fixed-length vectors. */
2725 int count
= gather_off_nunits
.to_constant ();
2726 vec_perm_builder
sel (count
, count
, 1);
2727 for (int i
= 0; i
< count
; ++i
)
2728 sel
.quick_push (i
| (count
/ 2));
2730 vec_perm_indices
indices (sel
, 1, count
);
2731 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2734 else if (known_eq (nunits
, gather_off_nunits
* 2))
2738 /* Currently narrowing gathers and scatters are only supported for
2739 fixed-length vectors. */
2740 int count
= nunits
.to_constant ();
2741 vec_perm_builder
sel (count
, count
, 1);
2742 sel
.quick_grow (count
);
2743 for (int i
= 0; i
< count
; ++i
)
2744 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2745 vec_perm_indices
indices (sel
, 2, count
);
2746 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2750 if (mask
&& masktype
== real_masktype
)
2752 for (int i
= 0; i
< count
; ++i
)
2753 sel
[i
] = i
| (count
/ 2);
2754 indices
.new_vector (sel
, 2, count
);
2755 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2758 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2763 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2764 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2766 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2767 if (!is_gimple_min_invariant (ptr
))
2770 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2771 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2772 gcc_assert (!new_bb
);
2775 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2777 tree vec_oprnd0
= NULL_TREE
;
2778 tree vec_mask
= NULL_TREE
;
2779 tree src_op
= NULL_TREE
;
2780 tree mask_op
= NULL_TREE
;
2781 tree prev_res
= NULL_TREE
;
2782 stmt_vec_info prev_stmt_info
= NULL
;
2786 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2787 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2790 for (int j
= 0; j
< ncopies
; ++j
)
2793 if (modifier
== WIDEN
&& (j
& 1))
2794 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2795 perm_mask
, stmt_info
, gsi
);
2798 = vect_get_vec_def_for_operand (vinfo
, gs_info
->offset
, stmt_info
);
2800 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2803 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2805 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2806 TYPE_VECTOR_SUBPARTS (idxtype
)));
2807 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2808 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2809 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2810 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2816 if (mask_perm_mask
&& (j
& 1))
2817 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2818 mask_perm_mask
, stmt_info
, gsi
);
2822 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
);
2823 else if (modifier
!= NARROW
|| (j
& 1) == 0)
2824 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2828 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2830 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2831 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2832 gcc_assert (known_eq (sub1
, sub2
));
2833 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2834 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2836 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2837 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2841 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2843 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2845 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2846 : VEC_UNPACK_LO_EXPR
,
2848 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2854 tree mask_arg
= mask_op
;
2855 if (masktype
!= real_masktype
)
2857 tree utype
, optype
= TREE_TYPE (mask_op
);
2858 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2859 utype
= real_masktype
;
2861 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2862 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2863 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2865 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2866 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2868 if (!useless_type_conversion_p (real_masktype
, utype
))
2870 gcc_assert (TYPE_PRECISION (utype
)
2871 <= TYPE_PRECISION (real_masktype
));
2872 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2873 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2874 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2877 src_op
= build_zero_cst (srctype
);
2879 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2882 stmt_vec_info new_stmt_info
;
2883 if (!useless_type_conversion_p (vectype
, rettype
))
2885 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2886 TYPE_VECTOR_SUBPARTS (rettype
)));
2887 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2888 gimple_call_set_lhs (new_call
, op
);
2889 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
2890 var
= make_ssa_name (vec_dest
);
2891 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2892 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2894 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2898 var
= make_ssa_name (vec_dest
, new_call
);
2899 gimple_call_set_lhs (new_call
, var
);
2901 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
2904 if (modifier
== NARROW
)
2911 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2913 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2916 if (prev_stmt_info
== NULL
)
2917 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2919 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2920 prev_stmt_info
= new_stmt_info
;
2924 /* Prepare the base and offset in GS_INFO for vectorization.
2925 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2926 to the vectorized offset argument for the first copy of STMT_INFO.
2927 STMT_INFO is the statement described by GS_INFO and LOOP is the
2931 vect_get_gather_scatter_ops (vec_info
*vinfo
,
2932 class loop
*loop
, stmt_vec_info stmt_info
,
2933 gather_scatter_info
*gs_info
,
2934 tree
*dataref_ptr
, tree
*vec_offset
)
2936 gimple_seq stmts
= NULL
;
2937 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2941 edge pe
= loop_preheader_edge (loop
);
2942 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2943 gcc_assert (!new_bb
);
2945 *vec_offset
= vect_get_vec_def_for_operand (vinfo
, gs_info
->offset
, stmt_info
,
2946 gs_info
->offset_vectype
);
2949 /* Prepare to implement a grouped or strided load or store using
2950 the gather load or scatter store operation described by GS_INFO.
2951 STMT_INFO is the load or store statement.
2953 Set *DATAREF_BUMP to the amount that should be added to the base
2954 address after each copy of the vectorized statement. Set *VEC_OFFSET
2955 to an invariant offset vector in which element I has the value
2956 I * DR_STEP / SCALE. */
2959 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2960 loop_vec_info loop_vinfo
,
2961 gather_scatter_info
*gs_info
,
2962 tree
*dataref_bump
, tree
*vec_offset
)
2964 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2965 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2966 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2969 tree bump
= size_binop (MULT_EXPR
,
2970 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2971 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2972 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2974 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2976 /* The offset given in GS_INFO can have pointer type, so use the element
2977 type of the vector instead. */
2978 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2979 offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2981 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2982 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2983 ssize_int (gs_info
->scale
));
2984 step
= fold_convert (offset_type
, step
);
2985 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2987 /* Create {0, X, X*2, X*3, ...}. */
2988 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2989 build_zero_cst (offset_type
), step
);
2991 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2994 /* Return the amount that should be added to a vector pointer to move
2995 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2996 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3000 vect_get_data_ptr_increment (vec_info
*vinfo
,
3001 dr_vec_info
*dr_info
, tree aggr_type
,
3002 vect_memory_access_type memory_access_type
)
3004 if (memory_access_type
== VMAT_INVARIANT
)
3005 return size_zero_node
;
3007 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3008 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3009 if (tree_int_cst_sgn (step
) == -1)
3010 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3014 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3017 vectorizable_bswap (vec_info
*vinfo
,
3018 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3019 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3021 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3024 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3025 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3028 op
= gimple_call_arg (stmt
, 0);
3029 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3030 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3032 /* Multiple types in SLP are handled by creating the appropriate number of
3033 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3038 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3040 gcc_assert (ncopies
>= 1);
3042 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3046 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3047 unsigned word_bytes
;
3048 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3051 /* The encoding uses one stepped pattern for each byte in the word. */
3052 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3053 for (unsigned i
= 0; i
< 3; ++i
)
3054 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3055 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3057 vec_perm_indices
indices (elts
, 1, num_bytes
);
3058 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3064 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3066 if (dump_enabled_p ())
3067 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3068 "incompatible vector types for invariants\n");
3072 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3073 DUMP_VECT_SCOPE ("vectorizable_bswap");
3076 record_stmt_cost (cost_vec
,
3077 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3078 record_stmt_cost (cost_vec
,
3079 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3084 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3087 vec
<tree
> vec_oprnds
= vNULL
;
3088 stmt_vec_info new_stmt_info
= NULL
;
3089 stmt_vec_info prev_stmt_info
= NULL
;
3090 for (unsigned j
= 0; j
< ncopies
; j
++)
3094 vect_get_vec_defs (vinfo
, op
, NULL
, stmt_info
, &vec_oprnds
, NULL
,
3097 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3099 /* Arguments are ready. create the new vector stmt. */
3102 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3105 tree tem
= make_ssa_name (char_vectype
);
3106 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3107 char_vectype
, vop
));
3108 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3109 tree tem2
= make_ssa_name (char_vectype
);
3110 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3111 tem
, tem
, bswap_vconst
);
3112 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3113 tem
= make_ssa_name (vectype
);
3114 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3117 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3119 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3126 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3128 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3130 prev_stmt_info
= new_stmt_info
;
3133 vec_oprnds
.release ();
3137 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3138 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3139 in a single step. On success, store the binary pack code in
3143 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3144 tree_code
*convert_code
)
3146 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3147 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3151 int multi_step_cvt
= 0;
3152 auto_vec
<tree
, 8> interm_types
;
3153 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3154 &code
, &multi_step_cvt
, &interm_types
)
3158 *convert_code
= code
;
3162 /* Function vectorizable_call.
3164 Check if STMT_INFO performs a function call that can be vectorized.
3165 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3166 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3167 Return true if STMT_INFO is vectorizable in this way. */
3170 vectorizable_call (vec_info
*vinfo
,
3171 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3172 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3173 stmt_vector_for_cost
*cost_vec
)
3179 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3180 stmt_vec_info prev_stmt_info
;
3181 tree vectype_out
, vectype_in
;
3182 poly_uint64 nunits_in
;
3183 poly_uint64 nunits_out
;
3184 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3185 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3186 tree fndecl
, new_temp
, rhs_type
;
3187 enum vect_def_type dt
[4]
3188 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3189 vect_unknown_def_type
};
3190 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3191 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3192 int ndts
= ARRAY_SIZE (dt
);
3194 auto_vec
<tree
, 8> vargs
;
3195 auto_vec
<tree
, 8> orig_vargs
;
3196 enum { NARROW
, NONE
, WIDEN
} modifier
;
3200 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3203 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3207 /* Is STMT_INFO a vectorizable call? */
3208 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3212 if (gimple_call_internal_p (stmt
)
3213 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3214 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3215 /* Handled by vectorizable_load and vectorizable_store. */
3218 if (gimple_call_lhs (stmt
) == NULL_TREE
3219 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3222 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3224 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3226 /* Process function arguments. */
3227 rhs_type
= NULL_TREE
;
3228 vectype_in
= NULL_TREE
;
3229 nargs
= gimple_call_num_args (stmt
);
3231 /* Bail out if the function has more than four arguments, we do not have
3232 interesting builtin functions to vectorize with more than two arguments
3233 except for fma. No arguments is also not good. */
3234 if (nargs
== 0 || nargs
> 4)
3237 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3238 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3239 if (cfn
== CFN_GOMP_SIMD_LANE
)
3242 rhs_type
= unsigned_type_node
;
3246 if (internal_fn_p (cfn
))
3247 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3249 for (i
= 0; i
< nargs
; i
++)
3251 if ((int) i
== mask_opno
)
3253 op
= gimple_call_arg (stmt
, i
);
3254 if (!vect_check_scalar_mask (vinfo
,
3255 stmt_info
, op
, &dt
[i
], &vectypes
[i
]))
3260 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3261 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3263 if (dump_enabled_p ())
3264 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3265 "use not simple.\n");
3269 /* We can only handle calls with arguments of the same type. */
3271 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3273 if (dump_enabled_p ())
3274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3275 "argument types differ.\n");
3279 rhs_type
= TREE_TYPE (op
);
3282 vectype_in
= vectypes
[i
];
3283 else if (vectypes
[i
]
3284 && !types_compatible_p (vectypes
[i
], vectype_in
))
3286 if (dump_enabled_p ())
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3288 "argument vector types differ.\n");
3292 /* If all arguments are external or constant defs, infer the vector type
3293 from the scalar type. */
3295 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3297 gcc_assert (vectype_in
);
3300 if (dump_enabled_p ())
3301 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3302 "no vectype for scalar type %T\n", rhs_type
);
3306 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3307 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3308 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3309 by a pack of the two vectors into an SI vector. We would need
3310 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3311 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3315 "mismatched vector sizes %T and %T\n",
3316 vectype_in
, vectype_out
);
3320 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3321 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3323 if (dump_enabled_p ())
3324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3325 "mixed mask and nonmask vector types\n");
3330 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3331 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3332 if (known_eq (nunits_in
* 2, nunits_out
))
3334 else if (known_eq (nunits_out
, nunits_in
))
3336 else if (known_eq (nunits_out
* 2, nunits_in
))
3341 /* We only handle functions that do not read or clobber memory. */
3342 if (gimple_vuse (stmt
))
3344 if (dump_enabled_p ())
3345 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3346 "function reads from or writes to memory.\n");
3350 /* For now, we only vectorize functions if a target specific builtin
3351 is available. TODO -- in some cases, it might be profitable to
3352 insert the calls for pieces of the vector, in order to be able
3353 to vectorize other operations in the loop. */
3355 internal_fn ifn
= IFN_LAST
;
3356 tree callee
= gimple_call_fndecl (stmt
);
3358 /* First try using an internal function. */
3359 tree_code convert_code
= ERROR_MARK
;
3361 && (modifier
== NONE
3362 || (modifier
== NARROW
3363 && simple_integer_narrowing (vectype_out
, vectype_in
,
3365 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3368 /* If that fails, try asking for a target-specific built-in function. */
3369 if (ifn
== IFN_LAST
)
3371 if (cfn
!= CFN_LAST
)
3372 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3373 (cfn
, vectype_out
, vectype_in
);
3374 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3375 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3376 (callee
, vectype_out
, vectype_in
);
3379 if (ifn
== IFN_LAST
&& !fndecl
)
3381 if (cfn
== CFN_GOMP_SIMD_LANE
3384 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3385 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3386 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3387 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3389 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3390 { 0, 1, 2, ... vf - 1 } vector. */
3391 gcc_assert (nargs
== 0);
3393 else if (modifier
== NONE
3394 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3395 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3396 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3397 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3398 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3399 slp_op
, vectype_in
, cost_vec
);
3402 if (dump_enabled_p ())
3403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3404 "function is not vectorizable.\n");
3411 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3412 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3414 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3416 /* Sanity check: make sure that at least one copy of the vectorized stmt
3417 needs to be generated. */
3418 gcc_assert (ncopies
>= 1);
3420 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3421 if (!vec_stmt
) /* transformation not required. */
3424 for (i
= 0; i
< nargs
; ++i
)
3425 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3427 if (dump_enabled_p ())
3428 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3429 "incompatible vector types for invariants\n");
3432 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3433 DUMP_VECT_SCOPE ("vectorizable_call");
3434 vect_model_simple_cost (vinfo
, stmt_info
,
3435 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3436 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3437 record_stmt_cost (cost_vec
, ncopies
/ 2,
3438 vec_promote_demote
, stmt_info
, 0, vect_body
);
3440 if (loop_vinfo
&& mask_opno
>= 0)
3442 unsigned int nvectors
= (slp_node
3443 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3445 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3446 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3447 vectype_out
, scalar_mask
);
3454 if (dump_enabled_p ())
3455 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3458 scalar_dest
= gimple_call_lhs (stmt
);
3459 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3461 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3463 stmt_vec_info new_stmt_info
= NULL
;
3464 prev_stmt_info
= NULL
;
3465 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3467 tree prev_res
= NULL_TREE
;
3468 vargs
.safe_grow (nargs
);
3469 orig_vargs
.safe_grow (nargs
);
3470 for (j
= 0; j
< ncopies
; ++j
)
3472 /* Build argument list for the vectorized call. */
3475 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3476 vec
<tree
> vec_oprnds0
;
3478 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3479 vec_oprnds0
= vec_defs
[0];
3481 /* Arguments are ready. Create the new vector stmt. */
3482 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3485 for (k
= 0; k
< nargs
; k
++)
3487 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3488 vargs
[k
] = vec_oprndsk
[i
];
3490 if (modifier
== NARROW
)
3492 /* We don't define any narrowing conditional functions
3494 gcc_assert (mask_opno
< 0);
3495 tree half_res
= make_ssa_name (vectype_in
);
3497 = gimple_build_call_internal_vec (ifn
, vargs
);
3498 gimple_call_set_lhs (call
, half_res
);
3499 gimple_call_set_nothrow (call
, true);
3500 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3503 prev_res
= half_res
;
3506 new_temp
= make_ssa_name (vec_dest
);
3508 = gimple_build_assign (new_temp
, convert_code
,
3509 prev_res
, half_res
);
3511 = vect_finish_stmt_generation (vinfo
, stmt_info
,
3516 if (mask_opno
>= 0 && masked_loop_p
)
3518 unsigned int vec_num
= vec_oprnds0
.length ();
3519 /* Always true for SLP. */
3520 gcc_assert (ncopies
== 1);
3521 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3523 vargs
[mask_opno
] = prepare_load_store_mask
3524 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3528 if (ifn
!= IFN_LAST
)
3529 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3531 call
= gimple_build_call_vec (fndecl
, vargs
);
3532 new_temp
= make_ssa_name (vec_dest
, call
);
3533 gimple_call_set_lhs (call
, new_temp
);
3534 gimple_call_set_nothrow (call
, true);
3536 = vect_finish_stmt_generation (vinfo
, stmt_info
,
3539 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3542 for (i
= 0; i
< nargs
; i
++)
3544 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3545 vec_oprndsi
.release ();
3550 for (i
= 0; i
< nargs
; i
++)
3552 op
= gimple_call_arg (stmt
, i
);
3555 = vect_get_vec_def_for_operand (vinfo
,
3556 op
, stmt_info
, vectypes
[i
]);
3559 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3561 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3564 if (mask_opno
>= 0 && masked_loop_p
)
3566 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3569 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3570 vargs
[mask_opno
], gsi
);
3573 if (cfn
== CFN_GOMP_SIMD_LANE
)
3575 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3577 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3578 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3579 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3580 new_temp
= make_ssa_name (vec_dest
);
3581 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3583 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3585 else if (modifier
== NARROW
)
3587 /* We don't define any narrowing conditional functions at
3589 gcc_assert (mask_opno
< 0);
3590 tree half_res
= make_ssa_name (vectype_in
);
3591 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3592 gimple_call_set_lhs (call
, half_res
);
3593 gimple_call_set_nothrow (call
, true);
3594 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3597 prev_res
= half_res
;
3600 new_temp
= make_ssa_name (vec_dest
);
3601 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3602 prev_res
, half_res
);
3604 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3609 if (ifn
!= IFN_LAST
)
3610 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3612 call
= gimple_build_call_vec (fndecl
, vargs
);
3613 new_temp
= make_ssa_name (vec_dest
, call
);
3614 gimple_call_set_lhs (call
, new_temp
);
3615 gimple_call_set_nothrow (call
, true);
3617 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3620 if (j
== (modifier
== NARROW
? 1 : 0))
3621 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3623 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3625 prev_stmt_info
= new_stmt_info
;
3628 else if (modifier
== NARROW
)
3630 /* We don't define any narrowing conditional functions at present. */
3631 gcc_assert (mask_opno
< 0);
3632 for (j
= 0; j
< ncopies
; ++j
)
3634 /* Build argument list for the vectorized call. */
3636 vargs
.create (nargs
* 2);
3642 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3643 vec
<tree
> vec_oprnds0
;
3645 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3646 vec_oprnds0
= vec_defs
[0];
3648 /* Arguments are ready. Create the new vector stmt. */
3649 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3653 for (k
= 0; k
< nargs
; k
++)
3655 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3656 vargs
.quick_push (vec_oprndsk
[i
]);
3657 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3660 if (ifn
!= IFN_LAST
)
3661 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3663 call
= gimple_build_call_vec (fndecl
, vargs
);
3664 new_temp
= make_ssa_name (vec_dest
, call
);
3665 gimple_call_set_lhs (call
, new_temp
);
3666 gimple_call_set_nothrow (call
, true);
3668 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3669 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3672 for (i
= 0; i
< nargs
; i
++)
3674 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3675 vec_oprndsi
.release ();
3680 for (i
= 0; i
< nargs
; i
++)
3682 op
= gimple_call_arg (stmt
, i
);
3686 = vect_get_vec_def_for_operand (vinfo
, op
, stmt_info
,
3689 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3693 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3696 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3698 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3701 vargs
.quick_push (vec_oprnd0
);
3702 vargs
.quick_push (vec_oprnd1
);
3705 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3706 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3707 gimple_call_set_lhs (new_stmt
, new_temp
);
3709 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3712 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3714 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3716 prev_stmt_info
= new_stmt_info
;
3719 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3722 /* No current target implements this case. */
3727 /* The call in STMT might prevent it from being removed in dce.
3728 We however cannot remove it here, due to the way the ssa name
3729 it defines is mapped to the new definition. So just replace
3730 rhs of the statement with something harmless. */
3735 stmt_info
= vect_orig_stmt (stmt_info
);
3736 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3739 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3740 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3746 struct simd_call_arg_info
3750 HOST_WIDE_INT linear_step
;
3751 enum vect_def_type dt
;
3753 bool simd_lane_linear
;
3756 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3757 is linear within simd lane (but not within whole loop), note it in
3761 vect_simd_lane_linear (tree op
, class loop
*loop
,
3762 struct simd_call_arg_info
*arginfo
)
3764 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3766 if (!is_gimple_assign (def_stmt
)
3767 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3768 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3771 tree base
= gimple_assign_rhs1 (def_stmt
);
3772 HOST_WIDE_INT linear_step
= 0;
3773 tree v
= gimple_assign_rhs2 (def_stmt
);
3774 while (TREE_CODE (v
) == SSA_NAME
)
3777 def_stmt
= SSA_NAME_DEF_STMT (v
);
3778 if (is_gimple_assign (def_stmt
))
3779 switch (gimple_assign_rhs_code (def_stmt
))
3782 t
= gimple_assign_rhs2 (def_stmt
);
3783 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3785 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3786 v
= gimple_assign_rhs1 (def_stmt
);
3789 t
= gimple_assign_rhs2 (def_stmt
);
3790 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3792 linear_step
= tree_to_shwi (t
);
3793 v
= gimple_assign_rhs1 (def_stmt
);
3796 t
= gimple_assign_rhs1 (def_stmt
);
3797 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3798 || (TYPE_PRECISION (TREE_TYPE (v
))
3799 < TYPE_PRECISION (TREE_TYPE (t
))))
3808 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3810 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3811 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3816 arginfo
->linear_step
= linear_step
;
3818 arginfo
->simd_lane_linear
= true;
3824 /* Return the number of elements in vector type VECTYPE, which is associated
3825 with a SIMD clone. At present these vectors always have a constant
3828 static unsigned HOST_WIDE_INT
3829 simd_clone_subparts (tree vectype
)
3831 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3834 /* Function vectorizable_simd_clone_call.
3836 Check if STMT_INFO performs a function call that can be vectorized
3837 by calling a simd clone of the function.
3838 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3839 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3840 Return true if STMT_INFO is vectorizable in this way. */
3843 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3844 gimple_stmt_iterator
*gsi
,
3845 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3846 stmt_vector_for_cost
*)
3851 tree vec_oprnd0
= NULL_TREE
;
3852 stmt_vec_info prev_stmt_info
;
3854 unsigned int nunits
;
3855 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3856 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3857 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3858 tree fndecl
, new_temp
;
3860 auto_vec
<simd_call_arg_info
> arginfo
;
3861 vec
<tree
> vargs
= vNULL
;
3863 tree lhs
, rtype
, ratype
;
3864 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3866 /* Is STMT a vectorizable call? */
3867 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3871 fndecl
= gimple_call_fndecl (stmt
);
3872 if (fndecl
== NULL_TREE
)
3875 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3876 if (node
== NULL
|| node
->simd_clones
== NULL
)
3879 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3882 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3886 if (gimple_call_lhs (stmt
)
3887 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3890 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3892 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3894 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3901 /* Process function arguments. */
3902 nargs
= gimple_call_num_args (stmt
);
3904 /* Bail out if the function has zero arguments. */
3908 arginfo
.reserve (nargs
, true);
3910 for (i
= 0; i
< nargs
; i
++)
3912 simd_call_arg_info thisarginfo
;
3915 thisarginfo
.linear_step
= 0;
3916 thisarginfo
.align
= 0;
3917 thisarginfo
.op
= NULL_TREE
;
3918 thisarginfo
.simd_lane_linear
= false;
3920 op
= gimple_call_arg (stmt
, i
);
3921 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3922 &thisarginfo
.vectype
)
3923 || thisarginfo
.dt
== vect_uninitialized_def
)
3925 if (dump_enabled_p ())
3926 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3927 "use not simple.\n");
3931 if (thisarginfo
.dt
== vect_constant_def
3932 || thisarginfo
.dt
== vect_external_def
)
3933 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3936 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3937 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3939 if (dump_enabled_p ())
3940 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3941 "vector mask arguments are not supported\n");
3946 /* For linear arguments, the analyze phase should have saved
3947 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3948 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3949 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3951 gcc_assert (vec_stmt
);
3952 thisarginfo
.linear_step
3953 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3955 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3956 thisarginfo
.simd_lane_linear
3957 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3958 == boolean_true_node
);
3959 /* If loop has been peeled for alignment, we need to adjust it. */
3960 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3961 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3962 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3964 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3965 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3966 tree opt
= TREE_TYPE (thisarginfo
.op
);
3967 bias
= fold_convert (TREE_TYPE (step
), bias
);
3968 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3970 = fold_build2 (POINTER_TYPE_P (opt
)
3971 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3972 thisarginfo
.op
, bias
);
3976 && thisarginfo
.dt
!= vect_constant_def
3977 && thisarginfo
.dt
!= vect_external_def
3979 && TREE_CODE (op
) == SSA_NAME
3980 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3982 && tree_fits_shwi_p (iv
.step
))
3984 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3985 thisarginfo
.op
= iv
.base
;
3987 else if ((thisarginfo
.dt
== vect_constant_def
3988 || thisarginfo
.dt
== vect_external_def
)
3989 && POINTER_TYPE_P (TREE_TYPE (op
)))
3990 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3991 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3993 if (POINTER_TYPE_P (TREE_TYPE (op
))
3994 && !thisarginfo
.linear_step
3996 && thisarginfo
.dt
!= vect_constant_def
3997 && thisarginfo
.dt
!= vect_external_def
4000 && TREE_CODE (op
) == SSA_NAME
)
4001 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4003 arginfo
.quick_push (thisarginfo
);
4006 unsigned HOST_WIDE_INT vf
;
4007 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
4009 if (dump_enabled_p ())
4010 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4011 "not considering SIMD clones; not yet supported"
4012 " for variable-width vectors.\n");
4016 unsigned int badness
= 0;
4017 struct cgraph_node
*bestn
= NULL
;
4018 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4019 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4021 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4022 n
= n
->simdclone
->next_clone
)
4024 unsigned int this_badness
= 0;
4025 if (n
->simdclone
->simdlen
> vf
4026 || n
->simdclone
->nargs
!= nargs
)
4028 if (n
->simdclone
->simdlen
< vf
)
4029 this_badness
+= (exact_log2 (vf
)
4030 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
4031 if (n
->simdclone
->inbranch
)
4032 this_badness
+= 2048;
4033 int target_badness
= targetm
.simd_clone
.usable (n
);
4034 if (target_badness
< 0)
4036 this_badness
+= target_badness
* 512;
4037 /* FORNOW: Have to add code to add the mask argument. */
4038 if (n
->simdclone
->inbranch
)
4040 for (i
= 0; i
< nargs
; i
++)
4042 switch (n
->simdclone
->args
[i
].arg_type
)
4044 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4045 if (!useless_type_conversion_p
4046 (n
->simdclone
->args
[i
].orig_type
,
4047 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4049 else if (arginfo
[i
].dt
== vect_constant_def
4050 || arginfo
[i
].dt
== vect_external_def
4051 || arginfo
[i
].linear_step
)
4054 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4055 if (arginfo
[i
].dt
!= vect_constant_def
4056 && arginfo
[i
].dt
!= vect_external_def
)
4059 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4060 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4061 if (arginfo
[i
].dt
== vect_constant_def
4062 || arginfo
[i
].dt
== vect_external_def
4063 || (arginfo
[i
].linear_step
4064 != n
->simdclone
->args
[i
].linear_step
))
4067 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4068 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4069 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4070 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4071 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4072 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4076 case SIMD_CLONE_ARG_TYPE_MASK
:
4079 if (i
== (size_t) -1)
4081 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4086 if (arginfo
[i
].align
)
4087 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4088 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4090 if (i
== (size_t) -1)
4092 if (bestn
== NULL
|| this_badness
< badness
)
4095 badness
= this_badness
;
4102 for (i
= 0; i
< nargs
; i
++)
4103 if ((arginfo
[i
].dt
== vect_constant_def
4104 || arginfo
[i
].dt
== vect_external_def
)
4105 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4107 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4108 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4110 if (arginfo
[i
].vectype
== NULL
4111 || (simd_clone_subparts (arginfo
[i
].vectype
)
4112 > bestn
->simdclone
->simdlen
))
4116 fndecl
= bestn
->decl
;
4117 nunits
= bestn
->simdclone
->simdlen
;
4118 ncopies
= vf
/ nunits
;
4120 /* If the function isn't const, only allow it in simd loops where user
4121 has asserted that at least nunits consecutive iterations can be
4122 performed using SIMD instructions. */
4123 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4124 && gimple_vuse (stmt
))
4127 /* Sanity check: make sure that at least one copy of the vectorized stmt
4128 needs to be generated. */
4129 gcc_assert (ncopies
>= 1);
4131 if (!vec_stmt
) /* transformation not required. */
4133 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4134 for (i
= 0; i
< nargs
; i
++)
4135 if ((bestn
->simdclone
->args
[i
].arg_type
4136 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4137 || (bestn
->simdclone
->args
[i
].arg_type
4138 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4140 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4142 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4143 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4144 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4145 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4146 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4147 tree sll
= arginfo
[i
].simd_lane_linear
4148 ? boolean_true_node
: boolean_false_node
;
4149 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4151 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4152 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4153 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4154 dt, slp_node, cost_vec); */
4160 if (dump_enabled_p ())
4161 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4164 scalar_dest
= gimple_call_lhs (stmt
);
4165 vec_dest
= NULL_TREE
;
4170 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4171 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4172 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4175 rtype
= TREE_TYPE (ratype
);
4179 prev_stmt_info
= NULL
;
4180 for (j
= 0; j
< ncopies
; ++j
)
4182 /* Build argument list for the vectorized call. */
4184 vargs
.create (nargs
);
4188 for (i
= 0; i
< nargs
; i
++)
4190 unsigned int k
, l
, m
, o
;
4192 op
= gimple_call_arg (stmt
, i
);
4193 switch (bestn
->simdclone
->args
[i
].arg_type
)
4195 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4196 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4197 o
= nunits
/ simd_clone_subparts (atype
);
4198 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4200 if (simd_clone_subparts (atype
)
4201 < simd_clone_subparts (arginfo
[i
].vectype
))
4203 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4204 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4205 / simd_clone_subparts (atype
));
4206 gcc_assert ((k
& (k
- 1)) == 0);
4209 = vect_get_vec_def_for_operand (vinfo
, op
, stmt_info
);
4212 vec_oprnd0
= arginfo
[i
].op
;
4213 if ((m
& (k
- 1)) == 0)
4215 = vect_get_vec_def_for_stmt_copy (vinfo
,
4218 arginfo
[i
].op
= vec_oprnd0
;
4220 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4222 bitsize_int ((m
& (k
- 1)) * prec
));
4224 = gimple_build_assign (make_ssa_name (atype
),
4226 vect_finish_stmt_generation (vinfo
, stmt_info
,
4228 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4232 k
= (simd_clone_subparts (atype
)
4233 / simd_clone_subparts (arginfo
[i
].vectype
));
4234 gcc_assert ((k
& (k
- 1)) == 0);
4235 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4237 vec_alloc (ctor_elts
, k
);
4240 for (l
= 0; l
< k
; l
++)
4242 if (m
== 0 && l
== 0)
4244 = vect_get_vec_def_for_operand (vinfo
,
4248 = vect_get_vec_def_for_stmt_copy (vinfo
,
4250 arginfo
[i
].op
= vec_oprnd0
;
4253 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4257 vargs
.safe_push (vec_oprnd0
);
4260 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4262 = gimple_build_assign (make_ssa_name (atype
),
4264 vect_finish_stmt_generation (vinfo
, stmt_info
,
4266 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4271 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4272 vargs
.safe_push (op
);
4274 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4275 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4280 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4281 &stmts
, true, NULL_TREE
);
4285 edge pe
= loop_preheader_edge (loop
);
4286 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4287 gcc_assert (!new_bb
);
4289 if (arginfo
[i
].simd_lane_linear
)
4291 vargs
.safe_push (arginfo
[i
].op
);
4294 tree phi_res
= copy_ssa_name (op
);
4295 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4296 loop_vinfo
->add_stmt (new_phi
);
4297 add_phi_arg (new_phi
, arginfo
[i
].op
,
4298 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4300 = POINTER_TYPE_P (TREE_TYPE (op
))
4301 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4302 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4303 ? sizetype
: TREE_TYPE (op
);
4305 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4307 tree tcst
= wide_int_to_tree (type
, cst
);
4308 tree phi_arg
= copy_ssa_name (op
);
4310 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4311 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4312 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4313 loop_vinfo
->add_stmt (new_stmt
);
4314 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4316 arginfo
[i
].op
= phi_res
;
4317 vargs
.safe_push (phi_res
);
4322 = POINTER_TYPE_P (TREE_TYPE (op
))
4323 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4324 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4325 ? sizetype
: TREE_TYPE (op
);
4327 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4329 tree tcst
= wide_int_to_tree (type
, cst
);
4330 new_temp
= make_ssa_name (TREE_TYPE (op
));
4332 = gimple_build_assign (new_temp
, code
,
4333 arginfo
[i
].op
, tcst
);
4334 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4335 vargs
.safe_push (new_temp
);
4338 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4339 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4340 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4341 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4342 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4343 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4349 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4352 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4354 new_temp
= create_tmp_var (ratype
);
4355 else if (simd_clone_subparts (vectype
)
4356 == simd_clone_subparts (rtype
))
4357 new_temp
= make_ssa_name (vec_dest
, new_call
);
4359 new_temp
= make_ssa_name (rtype
, new_call
);
4360 gimple_call_set_lhs (new_call
, new_temp
);
4362 stmt_vec_info new_stmt_info
4363 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4367 if (simd_clone_subparts (vectype
) < nunits
)
4370 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4371 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4372 k
= nunits
/ simd_clone_subparts (vectype
);
4373 gcc_assert ((k
& (k
- 1)) == 0);
4374 for (l
= 0; l
< k
; l
++)
4379 t
= build_fold_addr_expr (new_temp
);
4380 t
= build2 (MEM_REF
, vectype
, t
,
4381 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4384 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4385 bitsize_int (prec
), bitsize_int (l
* prec
));
4387 = gimple_build_assign (make_ssa_name (vectype
), t
);
4389 = vect_finish_stmt_generation (vinfo
, stmt_info
,
4392 if (j
== 0 && l
== 0)
4393 STMT_VINFO_VEC_STMT (stmt_info
)
4394 = *vec_stmt
= new_stmt_info
;
4396 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4398 prev_stmt_info
= new_stmt_info
;
4402 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4405 else if (simd_clone_subparts (vectype
) > nunits
)
4407 unsigned int k
= (simd_clone_subparts (vectype
)
4408 / simd_clone_subparts (rtype
));
4409 gcc_assert ((k
& (k
- 1)) == 0);
4410 if ((j
& (k
- 1)) == 0)
4411 vec_alloc (ret_ctor_elts
, k
);
4414 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4415 for (m
= 0; m
< o
; m
++)
4417 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4418 size_int (m
), NULL_TREE
, NULL_TREE
);
4420 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4422 = vect_finish_stmt_generation (vinfo
, stmt_info
,
4424 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4425 gimple_assign_lhs (new_stmt
));
4427 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4430 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4431 if ((j
& (k
- 1)) != k
- 1)
4433 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4435 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4437 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4439 if ((unsigned) j
== k
- 1)
4440 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4442 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4444 prev_stmt_info
= new_stmt_info
;
4449 tree t
= build_fold_addr_expr (new_temp
);
4450 t
= build2 (MEM_REF
, vectype
, t
,
4451 build_int_cst (TREE_TYPE (t
), 0));
4453 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4455 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4456 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4461 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4463 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4465 prev_stmt_info
= new_stmt_info
;
4470 /* The call in STMT might prevent it from being removed in dce.
4471 We however cannot remove it here, due to the way the ssa name
4472 it defines is mapped to the new definition. So just replace
4473 rhs of the statement with something harmless. */
4481 type
= TREE_TYPE (scalar_dest
);
4482 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4483 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4486 new_stmt
= gimple_build_nop ();
4487 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4488 unlink_stmt_vdef (stmt
);
4494 /* Function vect_gen_widened_results_half
4496 Create a vector stmt whose code, type, number of arguments, and result
4497 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4498 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4499 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4500 needs to be created (DECL is a function-decl of a target-builtin).
4501 STMT_INFO is the original scalar stmt that we are vectorizing. */
4504 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4505 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4506 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4507 stmt_vec_info stmt_info
)
4512 /* Generate half of the widened result: */
4513 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4514 if (op_type
!= binary_op
)
4516 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4517 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4518 gimple_assign_set_lhs (new_stmt
, new_temp
);
4519 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4525 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4526 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4527 containing scalar operand), and for the rest we get a copy with
4528 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4529 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4530 The vectors are collected into VEC_OPRNDS. */
4533 vect_get_loop_based_defs (vec_info
*vinfo
, tree
*oprnd
, stmt_vec_info stmt_info
,
4534 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4538 /* Get first vector operand. */
4539 /* All the vector operands except the very first one (that is scalar oprnd)
4541 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4542 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, *oprnd
, stmt_info
);
4544 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4546 vec_oprnds
->quick_push (vec_oprnd
);
4548 /* Get second vector operand. */
4549 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4550 vec_oprnds
->quick_push (vec_oprnd
);
4554 /* For conversion in multiple steps, continue to get operands
4557 vect_get_loop_based_defs (vinfo
, oprnd
, stmt_info
, vec_oprnds
,
4558 multi_step_cvt
- 1);
4562 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4563 For multi-step conversions store the resulting vectors and call the function
4567 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4569 stmt_vec_info stmt_info
,
4571 gimple_stmt_iterator
*gsi
,
4572 slp_tree slp_node
, enum tree_code code
,
4573 stmt_vec_info
*prev_stmt_info
)
4576 tree vop0
, vop1
, new_tmp
, vec_dest
;
4578 vec_dest
= vec_dsts
.pop ();
4580 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4582 /* Create demotion operation. */
4583 vop0
= (*vec_oprnds
)[i
];
4584 vop1
= (*vec_oprnds
)[i
+ 1];
4585 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4586 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4587 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4588 stmt_vec_info new_stmt_info
4589 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4592 /* Store the resulting vector for next recursive call. */
4593 (*vec_oprnds
)[i
/2] = new_tmp
;
4596 /* This is the last step of the conversion sequence. Store the
4597 vectors in SLP_NODE or in vector info of the scalar statement
4598 (or in STMT_VINFO_RELATED_STMT chain). */
4600 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4603 if (!*prev_stmt_info
)
4604 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4606 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4608 *prev_stmt_info
= new_stmt_info
;
4613 /* For multi-step demotion operations we first generate demotion operations
4614 from the source type to the intermediate types, and then combine the
4615 results (stored in VEC_OPRNDS) in demotion operation to the destination
4619 /* At each level of recursion we have half of the operands we had at the
4621 vec_oprnds
->truncate ((i
+1)/2);
4622 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4624 stmt_info
, vec_dsts
, gsi
,
4625 slp_node
, VEC_PACK_TRUNC_EXPR
,
4629 vec_dsts
.quick_push (vec_dest
);
4633 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4634 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4635 STMT_INFO. For multi-step conversions store the resulting vectors and
4636 call the function recursively. */
4639 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4640 vec
<tree
> *vec_oprnds0
,
4641 vec
<tree
> *vec_oprnds1
,
4642 stmt_vec_info stmt_info
, tree vec_dest
,
4643 gimple_stmt_iterator
*gsi
,
4644 enum tree_code code1
,
4645 enum tree_code code2
, int op_type
)
4648 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4649 gimple
*new_stmt1
, *new_stmt2
;
4650 vec
<tree
> vec_tmp
= vNULL
;
4652 vec_tmp
.create (vec_oprnds0
->length () * 2);
4653 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4655 if (op_type
== binary_op
)
4656 vop1
= (*vec_oprnds1
)[i
];
4660 /* Generate the two halves of promotion operation. */
4661 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4662 op_type
, vec_dest
, gsi
,
4664 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4665 op_type
, vec_dest
, gsi
,
4667 if (is_gimple_call (new_stmt1
))
4669 new_tmp1
= gimple_call_lhs (new_stmt1
);
4670 new_tmp2
= gimple_call_lhs (new_stmt2
);
4674 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4675 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4678 /* Store the results for the next step. */
4679 vec_tmp
.quick_push (new_tmp1
);
4680 vec_tmp
.quick_push (new_tmp2
);
4683 vec_oprnds0
->release ();
4684 *vec_oprnds0
= vec_tmp
;
4688 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4689 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4690 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4691 Return true if STMT_INFO is vectorizable in this way. */
4694 vectorizable_conversion (vec_info
*vinfo
,
4695 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4696 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4697 stmt_vector_for_cost
*cost_vec
)
4701 tree op0
, op1
= NULL_TREE
;
4702 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4703 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4704 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4705 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4707 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4709 stmt_vec_info prev_stmt_info
;
4710 poly_uint64 nunits_in
;
4711 poly_uint64 nunits_out
;
4712 tree vectype_out
, vectype_in
;
4714 tree lhs_type
, rhs_type
;
4715 enum { NARROW
, NONE
, WIDEN
} modifier
;
4716 vec
<tree
> vec_oprnds0
= vNULL
;
4717 vec
<tree
> vec_oprnds1
= vNULL
;
4719 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4720 int multi_step_cvt
= 0;
4721 vec
<tree
> interm_types
= vNULL
;
4722 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4724 unsigned short fltsz
;
4726 /* Is STMT a vectorizable conversion? */
4728 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4731 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4735 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4739 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4742 code
= gimple_assign_rhs_code (stmt
);
4743 if (!CONVERT_EXPR_CODE_P (code
)
4744 && code
!= FIX_TRUNC_EXPR
4745 && code
!= FLOAT_EXPR
4746 && code
!= WIDEN_MULT_EXPR
4747 && code
!= WIDEN_LSHIFT_EXPR
)
4750 op_type
= TREE_CODE_LENGTH (code
);
4752 /* Check types of lhs and rhs. */
4753 scalar_dest
= gimple_assign_lhs (stmt
);
4754 lhs_type
= TREE_TYPE (scalar_dest
);
4755 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4757 /* Check the operands of the operation. */
4758 slp_tree slp_op0
, slp_op1
= NULL
;
4759 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4760 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4764 "use not simple.\n");
4768 rhs_type
= TREE_TYPE (op0
);
4769 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4770 && !((INTEGRAL_TYPE_P (lhs_type
)
4771 && INTEGRAL_TYPE_P (rhs_type
))
4772 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4773 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4776 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4777 && ((INTEGRAL_TYPE_P (lhs_type
)
4778 && !type_has_mode_precision_p (lhs_type
))
4779 || (INTEGRAL_TYPE_P (rhs_type
)
4780 && !type_has_mode_precision_p (rhs_type
))))
4782 if (dump_enabled_p ())
4783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4784 "type conversion to/from bit-precision unsupported."
4789 if (op_type
== binary_op
)
4791 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4793 op1
= gimple_assign_rhs2 (stmt
);
4795 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4796 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4798 if (dump_enabled_p ())
4799 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4800 "use not simple.\n");
4803 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4806 vectype_in
= vectype1_in
;
4809 /* If op0 is an external or constant def, infer the vector type
4810 from the scalar type. */
4812 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4814 gcc_assert (vectype_in
);
4817 if (dump_enabled_p ())
4818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4819 "no vectype for scalar type %T\n", rhs_type
);
4824 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4825 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4827 if (dump_enabled_p ())
4828 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4829 "can't convert between boolean and non "
4830 "boolean vectors %T\n", rhs_type
);
4835 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4836 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4837 if (known_eq (nunits_out
, nunits_in
))
4839 else if (multiple_p (nunits_out
, nunits_in
))
4843 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4847 /* Multiple types in SLP are handled by creating the appropriate number of
4848 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4852 else if (modifier
== NARROW
)
4853 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4855 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4857 /* Sanity check: make sure that at least one copy of the vectorized stmt
4858 needs to be generated. */
4859 gcc_assert (ncopies
>= 1);
4861 bool found_mode
= false;
4862 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4863 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4864 opt_scalar_mode rhs_mode_iter
;
4866 /* Supportable by target? */
4870 if (code
!= FIX_TRUNC_EXPR
4871 && code
!= FLOAT_EXPR
4872 && !CONVERT_EXPR_CODE_P (code
))
4874 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4878 if (dump_enabled_p ())
4879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4880 "conversion not supported by target.\n");
4884 if (supportable_widening_operation (vinfo
, code
, stmt_info
, vectype_out
,
4885 vectype_in
, &code1
, &code2
,
4886 &multi_step_cvt
, &interm_types
))
4888 /* Binary widening operation can only be supported directly by the
4890 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4894 if (code
!= FLOAT_EXPR
4895 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4898 fltsz
= GET_MODE_SIZE (lhs_mode
);
4899 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4901 rhs_mode
= rhs_mode_iter
.require ();
4902 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4906 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4907 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4908 if (cvt_type
== NULL_TREE
)
4911 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4913 if (!supportable_convert_operation (code
, vectype_out
,
4914 cvt_type
, &codecvt1
))
4917 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4918 vectype_out
, cvt_type
,
4919 &codecvt1
, &codecvt2
,
4924 gcc_assert (multi_step_cvt
== 0);
4926 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
4928 vectype_in
, &code1
, &code2
,
4929 &multi_step_cvt
, &interm_types
))
4939 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4940 codecvt2
= ERROR_MARK
;
4944 interm_types
.safe_push (cvt_type
);
4945 cvt_type
= NULL_TREE
;
4950 gcc_assert (op_type
== unary_op
);
4951 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4952 &code1
, &multi_step_cvt
,
4956 if (code
!= FIX_TRUNC_EXPR
4957 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4961 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4962 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4963 if (cvt_type
== NULL_TREE
)
4965 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4968 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4969 &code1
, &multi_step_cvt
,
4978 if (!vec_stmt
) /* transformation not required. */
4981 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
4982 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
4984 if (dump_enabled_p ())
4985 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4986 "incompatible vector types for invariants\n");
4989 DUMP_VECT_SCOPE ("vectorizable_conversion");
4990 if (modifier
== NONE
)
4992 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4993 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4996 else if (modifier
== NARROW
)
4998 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4999 /* The final packing step produces one vector result per copy. */
5000 unsigned int nvectors
5001 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5002 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5003 multi_step_cvt
, cost_vec
);
5007 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5008 /* The initial unpacking step produces two vector results
5009 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5010 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5011 unsigned int nvectors
5013 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5015 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5016 multi_step_cvt
, cost_vec
);
5018 interm_types
.release ();
5023 if (dump_enabled_p ())
5024 dump_printf_loc (MSG_NOTE
, vect_location
,
5025 "transform conversion. ncopies = %d.\n", ncopies
);
5027 if (op_type
== binary_op
)
5029 if (CONSTANT_CLASS_P (op0
))
5030 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5031 else if (CONSTANT_CLASS_P (op1
))
5032 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5035 /* In case of multi-step conversion, we first generate conversion operations
5036 to the intermediate types, and then from that types to the final one.
5037 We create vector destinations for the intermediate type (TYPES) received
5038 from supportable_*_operation, and store them in the correct order
5039 for future use in vect_create_vectorized_*_stmts (). */
5040 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5041 vec_dest
= vect_create_destination_var (scalar_dest
,
5042 (cvt_type
&& modifier
== WIDEN
)
5043 ? cvt_type
: vectype_out
);
5044 vec_dsts
.quick_push (vec_dest
);
5048 for (i
= interm_types
.length () - 1;
5049 interm_types
.iterate (i
, &intermediate_type
); i
--)
5051 vec_dest
= vect_create_destination_var (scalar_dest
,
5053 vec_dsts
.quick_push (vec_dest
);
5058 vec_dest
= vect_create_destination_var (scalar_dest
,
5060 ? vectype_out
: cvt_type
);
5064 if (modifier
== WIDEN
)
5066 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
5067 if (op_type
== binary_op
)
5068 vec_oprnds1
.create (1);
5070 else if (modifier
== NARROW
)
5071 vec_oprnds0
.create (
5072 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
5074 else if (code
== WIDEN_LSHIFT_EXPR
)
5075 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5078 prev_stmt_info
= NULL
;
5082 for (j
= 0; j
< ncopies
; j
++)
5085 vect_get_vec_defs (vinfo
, op0
, NULL
, stmt_info
, &vec_oprnds0
,
5088 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
5090 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5092 stmt_vec_info new_stmt_info
;
5093 /* Arguments are ready, create the new vector stmt. */
5094 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5095 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5096 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5097 gimple_assign_set_lhs (new_stmt
, new_temp
);
5099 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5102 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5105 if (!prev_stmt_info
)
5106 STMT_VINFO_VEC_STMT (stmt_info
)
5107 = *vec_stmt
= new_stmt_info
;
5109 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5110 prev_stmt_info
= new_stmt_info
;
5117 /* In case the vectorization factor (VF) is bigger than the number
5118 of elements that we can fit in a vectype (nunits), we have to
5119 generate more than one vector stmt - i.e - we need to "unroll"
5120 the vector stmt by a factor VF/nunits. */
5121 for (j
= 0; j
< ncopies
; j
++)
5128 if (code
== WIDEN_LSHIFT_EXPR
)
5133 /* Store vec_oprnd1 for every vector stmt to be created
5134 for SLP_NODE. We check during the analysis that all
5135 the shift arguments are the same. */
5136 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5137 vec_oprnds1
.quick_push (vec_oprnd1
);
5139 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
,
5140 &vec_oprnds0
, NULL
, slp_node
);
5143 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
, &vec_oprnds0
,
5144 &vec_oprnds1
, slp_node
);
5148 vec_oprnd0
= vect_get_vec_def_for_operand (vinfo
,
5150 vec_oprnds0
.quick_push (vec_oprnd0
);
5151 if (op_type
== binary_op
)
5153 if (code
== WIDEN_LSHIFT_EXPR
)
5157 = vect_get_vec_def_for_operand (vinfo
,
5159 vec_oprnds1
.quick_push (vec_oprnd1
);
5165 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5166 vec_oprnds0
.truncate (0);
5167 vec_oprnds0
.quick_push (vec_oprnd0
);
5168 if (op_type
== binary_op
)
5170 if (code
== WIDEN_LSHIFT_EXPR
)
5173 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5175 vec_oprnds1
.truncate (0);
5176 vec_oprnds1
.quick_push (vec_oprnd1
);
5180 /* Arguments are ready. Create the new vector stmts. */
5181 for (i
= multi_step_cvt
; i
>= 0; i
--)
5183 tree this_dest
= vec_dsts
[i
];
5184 enum tree_code c1
= code1
, c2
= code2
;
5185 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5190 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5191 &vec_oprnds1
, stmt_info
,
5196 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5198 stmt_vec_info new_stmt_info
;
5201 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5202 new_temp
= make_ssa_name (vec_dest
);
5204 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5206 = vect_finish_stmt_generation (vinfo
, stmt_info
,
5210 new_stmt_info
= vinfo
->lookup_def (vop0
);
5213 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5216 if (!prev_stmt_info
)
5217 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5219 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5220 prev_stmt_info
= new_stmt_info
;
5225 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5229 /* In case the vectorization factor (VF) is bigger than the number
5230 of elements that we can fit in a vectype (nunits), we have to
5231 generate more than one vector stmt - i.e - we need to "unroll"
5232 the vector stmt by a factor VF/nunits. */
5233 for (j
= 0; j
< ncopies
; j
++)
5237 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
,
5241 vec_oprnds0
.truncate (0);
5242 vect_get_loop_based_defs (vinfo
,
5243 &last_oprnd
, stmt_info
, &vec_oprnds0
,
5244 vect_pow2 (multi_step_cvt
) - 1);
5247 /* Arguments are ready. Create the new vector stmts. */
5249 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5251 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5252 new_temp
= make_ssa_name (vec_dest
);
5254 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5255 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5256 vec_oprnds0
[i
] = new_temp
;
5259 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5261 stmt_info
, vec_dsts
, gsi
,
5266 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5270 vec_oprnds0
.release ();
5271 vec_oprnds1
.release ();
5272 interm_types
.release ();
5277 /* Return true if we can assume from the scalar form of STMT_INFO that
5278 neither the scalar nor the vector forms will generate code. STMT_INFO
5279 is known not to involve a data reference. */
5282 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5284 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5288 tree lhs
= gimple_assign_lhs (stmt
);
5289 tree_code code
= gimple_assign_rhs_code (stmt
);
5290 tree rhs
= gimple_assign_rhs1 (stmt
);
5292 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5295 if (CONVERT_EXPR_CODE_P (code
))
5296 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5301 /* Function vectorizable_assignment.
5303 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5304 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5305 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5306 Return true if STMT_INFO is vectorizable in this way. */
5309 vectorizable_assignment (vec_info
*vinfo
,
5310 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5311 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5312 stmt_vector_for_cost
*cost_vec
)
5317 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5319 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5323 vec
<tree
> vec_oprnds
= vNULL
;
5325 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5326 stmt_vec_info prev_stmt_info
= NULL
;
5327 enum tree_code code
;
5330 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5333 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5337 /* Is vectorizable assignment? */
5338 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5342 scalar_dest
= gimple_assign_lhs (stmt
);
5343 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5346 if (STMT_VINFO_DATA_REF (stmt_info
))
5349 code
= gimple_assign_rhs_code (stmt
);
5350 if (!(gimple_assign_single_p (stmt
)
5351 || code
== PAREN_EXPR
5352 || CONVERT_EXPR_CODE_P (code
)))
5355 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5356 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5358 /* Multiple types in SLP are handled by creating the appropriate number of
5359 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5364 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5366 gcc_assert (ncopies
>= 1);
5369 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5370 &dt
[0], &vectype_in
))
5372 if (dump_enabled_p ())
5373 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5374 "use not simple.\n");
5378 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5380 /* We can handle NOP_EXPR conversions that do not change the number
5381 of elements or the vector size. */
5382 if ((CONVERT_EXPR_CODE_P (code
)
5383 || code
== VIEW_CONVERT_EXPR
)
5385 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5386 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5387 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5390 /* We do not handle bit-precision changes. */
5391 if ((CONVERT_EXPR_CODE_P (code
)
5392 || code
== VIEW_CONVERT_EXPR
)
5393 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5394 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5395 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5396 /* But a conversion that does not change the bit-pattern is ok. */
5397 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5398 > TYPE_PRECISION (TREE_TYPE (op
)))
5399 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5400 /* Conversion between boolean types of different sizes is
5401 a simple assignment in case their vectypes are same
5403 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5404 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5406 if (dump_enabled_p ())
5407 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5408 "type conversion to/from bit-precision "
5413 if (!vec_stmt
) /* transformation not required. */
5416 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5418 if (dump_enabled_p ())
5419 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5420 "incompatible vector types for invariants\n");
5423 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5424 DUMP_VECT_SCOPE ("vectorizable_assignment");
5425 if (!vect_nop_conversion_p (stmt_info
))
5426 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5432 if (dump_enabled_p ())
5433 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5436 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5439 for (j
= 0; j
< ncopies
; j
++)
5443 vect_get_vec_defs (vinfo
, op
, NULL
, stmt_info
, &vec_oprnds
, NULL
,
5446 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5448 /* Arguments are ready. create the new vector stmt. */
5449 stmt_vec_info new_stmt_info
= NULL
;
5450 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5452 if (CONVERT_EXPR_CODE_P (code
)
5453 || code
== VIEW_CONVERT_EXPR
)
5454 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5455 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5456 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5457 gimple_assign_set_lhs (new_stmt
, new_temp
);
5459 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5461 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5468 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5470 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5472 prev_stmt_info
= new_stmt_info
;
5475 vec_oprnds
.release ();
5480 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5481 either as shift by a scalar or by a vector. */
5484 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5487 machine_mode vec_mode
;
5492 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5496 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5498 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5500 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5502 || (optab_handler (optab
, TYPE_MODE (vectype
))
5503 == CODE_FOR_nothing
))
5507 vec_mode
= TYPE_MODE (vectype
);
5508 icode
= (int) optab_handler (optab
, vec_mode
);
5509 if (icode
== CODE_FOR_nothing
)
5516 /* Function vectorizable_shift.
5518 Check if STMT_INFO performs a shift operation that can be vectorized.
5519 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5520 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5521 Return true if STMT_INFO is vectorizable in this way. */
5524 vectorizable_shift (vec_info
*vinfo
,
5525 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5526 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5527 stmt_vector_for_cost
*cost_vec
)
5531 tree op0
, op1
= NULL
;
5532 tree vec_oprnd1
= NULL_TREE
;
5534 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5535 enum tree_code code
;
5536 machine_mode vec_mode
;
5540 machine_mode optab_op2_mode
;
5541 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5543 stmt_vec_info prev_stmt_info
;
5544 poly_uint64 nunits_in
;
5545 poly_uint64 nunits_out
;
5550 vec
<tree
> vec_oprnds0
= vNULL
;
5551 vec
<tree
> vec_oprnds1
= vNULL
;
5554 bool scalar_shift_arg
= true;
5555 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5556 bool incompatible_op1_vectype_p
= false;
5558 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5561 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5562 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5566 /* Is STMT a vectorizable binary/unary operation? */
5567 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5571 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5574 code
= gimple_assign_rhs_code (stmt
);
5576 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5577 || code
== RROTATE_EXPR
))
5580 scalar_dest
= gimple_assign_lhs (stmt
);
5581 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5582 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5584 if (dump_enabled_p ())
5585 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5586 "bit-precision shifts not supported.\n");
5591 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5592 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5594 if (dump_enabled_p ())
5595 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5596 "use not simple.\n");
5599 /* If op0 is an external or constant def, infer the vector type
5600 from the scalar type. */
5602 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5604 gcc_assert (vectype
);
5607 if (dump_enabled_p ())
5608 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5609 "no vectype for scalar type\n");
5613 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5614 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5615 if (maybe_ne (nunits_out
, nunits_in
))
5618 stmt_vec_info op1_def_stmt_info
;
5620 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5621 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5623 if (dump_enabled_p ())
5624 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5625 "use not simple.\n");
5629 /* Multiple types in SLP are handled by creating the appropriate number of
5630 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5635 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5637 gcc_assert (ncopies
>= 1);
5639 /* Determine whether the shift amount is a vector, or scalar. If the
5640 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5642 if ((dt
[1] == vect_internal_def
5643 || dt
[1] == vect_induction_def
5644 || dt
[1] == vect_nested_cycle
)
5646 scalar_shift_arg
= false;
5647 else if (dt
[1] == vect_constant_def
5648 || dt
[1] == vect_external_def
5649 || dt
[1] == vect_internal_def
)
5651 /* In SLP, need to check whether the shift count is the same,
5652 in loops if it is a constant or invariant, it is always
5656 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5657 stmt_vec_info slpstmt_info
;
5659 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5661 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5662 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5663 scalar_shift_arg
= false;
5666 /* For internal SLP defs we have to make sure we see scalar stmts
5667 for all vector elements.
5668 ??? For different vectors we could resort to a different
5669 scalar shift operand but code-generation below simply always
5671 if (dt
[1] == vect_internal_def
5672 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5674 scalar_shift_arg
= false;
5677 /* If the shift amount is computed by a pattern stmt we cannot
5678 use the scalar amount directly thus give up and use a vector
5680 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5681 scalar_shift_arg
= false;
5685 if (dump_enabled_p ())
5686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5687 "operand mode requires invariant argument.\n");
5691 /* Vector shifted by vector. */
5692 bool was_scalar_shift_arg
= scalar_shift_arg
;
5693 if (!scalar_shift_arg
)
5695 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5696 if (dump_enabled_p ())
5697 dump_printf_loc (MSG_NOTE
, vect_location
,
5698 "vector/vector shift/rotate found.\n");
5701 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5703 incompatible_op1_vectype_p
5704 = (op1_vectype
== NULL_TREE
5705 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5706 TYPE_VECTOR_SUBPARTS (vectype
))
5707 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5708 if (incompatible_op1_vectype_p
5710 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5711 || slp_op1
->refcnt
!= 1))
5713 if (dump_enabled_p ())
5714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5715 "unusable type for last operand in"
5716 " vector/vector shift/rotate.\n");
5720 /* See if the machine has a vector shifted by scalar insn and if not
5721 then see if it has a vector shifted by vector insn. */
5724 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5726 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5728 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_NOTE
, vect_location
,
5730 "vector/scalar shift/rotate found.\n");
5734 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5736 && (optab_handler (optab
, TYPE_MODE (vectype
))
5737 != CODE_FOR_nothing
))
5739 scalar_shift_arg
= false;
5741 if (dump_enabled_p ())
5742 dump_printf_loc (MSG_NOTE
, vect_location
,
5743 "vector/vector shift/rotate found.\n");
5746 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5750 /* Unlike the other binary operators, shifts/rotates have
5751 the rhs being int, instead of the same type as the lhs,
5752 so make sure the scalar is the right type if we are
5753 dealing with vectors of long long/long/short/char. */
5754 incompatible_op1_vectype_p
5756 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5762 /* Supportable by target? */
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5770 vec_mode
= TYPE_MODE (vectype
);
5771 icode
= (int) optab_handler (optab
, vec_mode
);
5772 if (icode
== CODE_FOR_nothing
)
5774 if (dump_enabled_p ())
5775 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5776 "op not supported by target.\n");
5777 /* Check only during analysis. */
5778 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5780 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5782 if (dump_enabled_p ())
5783 dump_printf_loc (MSG_NOTE
, vect_location
,
5784 "proceeding using word mode.\n");
5787 /* Worthwhile without SIMD support? Check only during analysis. */
5789 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5790 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5792 if (dump_enabled_p ())
5793 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5794 "not worthwhile without SIMD support.\n");
5798 if (!vec_stmt
) /* transformation not required. */
5801 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5802 || (!scalar_shift_arg
5803 && (!incompatible_op1_vectype_p
5804 || dt
[1] == vect_constant_def
)
5805 && !vect_maybe_update_slp_op_vectype
5807 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5811 "incompatible vector types for invariants\n");
5814 /* Now adjust the constant shift amount in place. */
5816 && incompatible_op1_vectype_p
5817 && dt
[1] == vect_constant_def
)
5819 for (unsigned i
= 0;
5820 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5822 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5823 = fold_convert (TREE_TYPE (vectype
),
5824 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5825 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5829 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5830 DUMP_VECT_SCOPE ("vectorizable_shift");
5831 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5832 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5838 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_NOTE
, vect_location
,
5840 "transform binary/unary operation.\n");
5842 if (incompatible_op1_vectype_p
&& !slp_node
)
5844 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5845 if (dt
[1] != vect_constant_def
)
5846 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5847 TREE_TYPE (vectype
), NULL
);
5851 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5853 prev_stmt_info
= NULL
;
5854 for (j
= 0; j
< ncopies
; j
++)
5859 if (scalar_shift_arg
)
5861 /* Vector shl and shr insn patterns can be defined with scalar
5862 operand 2 (shift operand). In this case, use constant or loop
5863 invariant op1 directly, without extending it to vector mode
5865 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5866 if (!VECTOR_MODE_P (optab_op2_mode
))
5868 if (dump_enabled_p ())
5869 dump_printf_loc (MSG_NOTE
, vect_location
,
5870 "operand 1 using scalar mode.\n");
5872 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5873 vec_oprnds1
.quick_push (vec_oprnd1
);
5876 /* Store vec_oprnd1 for every vector stmt to be created
5877 for SLP_NODE. We check during the analysis that all
5878 the shift arguments are the same.
5879 TODO: Allow different constants for different vector
5880 stmts generated for an SLP instance. */
5881 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5882 vec_oprnds1
.quick_push (vec_oprnd1
);
5886 else if (slp_node
&& incompatible_op1_vectype_p
)
5888 if (was_scalar_shift_arg
)
5890 /* If the argument was the same in all lanes create
5891 the correctly typed vector shift amount directly. */
5892 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5893 op1
= vect_init_vector (vinfo
, stmt_info
,
5894 op1
, TREE_TYPE (vectype
),
5895 !loop_vinfo
? gsi
: NULL
);
5896 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5897 !loop_vinfo
? gsi
: NULL
);
5898 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5899 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5900 vec_oprnds1
.quick_push (vec_oprnd1
);
5902 else if (dt
[1] == vect_constant_def
)
5903 /* The constant shift amount has been adjusted in place. */
5906 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5909 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5910 (a special case for certain kind of vector shifts); otherwise,
5911 operand 1 should be of a vector type (the usual case). */
5913 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
,
5914 &vec_oprnds0
, NULL
, slp_node
);
5916 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
,
5917 &vec_oprnds0
, &vec_oprnds1
, slp_node
);
5920 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5922 /* Arguments are ready. Create the new vector stmt. */
5923 stmt_vec_info new_stmt_info
= NULL
;
5924 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5926 vop1
= vec_oprnds1
[i
];
5927 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5928 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5929 gimple_assign_set_lhs (new_stmt
, new_temp
);
5931 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5933 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5940 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5942 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5943 prev_stmt_info
= new_stmt_info
;
5946 vec_oprnds0
.release ();
5947 vec_oprnds1
.release ();
5953 /* Function vectorizable_operation.
5955 Check if STMT_INFO performs a binary, unary or ternary operation that can
5957 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5958 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5959 Return true if STMT_INFO is vectorizable in this way. */
5962 vectorizable_operation (vec_info
*vinfo
,
5963 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5964 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5965 stmt_vector_for_cost
*cost_vec
)
5969 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5971 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5972 enum tree_code code
, orig_code
;
5973 machine_mode vec_mode
;
5977 bool target_support_p
;
5978 enum vect_def_type dt
[3]
5979 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5981 stmt_vec_info prev_stmt_info
;
5982 poly_uint64 nunits_in
;
5983 poly_uint64 nunits_out
;
5985 int ncopies
, vec_num
;
5987 vec
<tree
> vec_oprnds0
= vNULL
;
5988 vec
<tree
> vec_oprnds1
= vNULL
;
5989 vec
<tree
> vec_oprnds2
= vNULL
;
5990 tree vop0
, vop1
, vop2
;
5991 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5993 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5996 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6000 /* Is STMT a vectorizable binary/unary operation? */
6001 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6005 /* Loads and stores are handled in vectorizable_{load,store}. */
6006 if (STMT_VINFO_DATA_REF (stmt_info
))
6009 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6011 /* Shifts are handled in vectorizable_shift. */
6012 if (code
== LSHIFT_EXPR
6013 || code
== RSHIFT_EXPR
6014 || code
== LROTATE_EXPR
6015 || code
== RROTATE_EXPR
)
6018 /* Comparisons are handled in vectorizable_comparison. */
6019 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6022 /* Conditions are handled in vectorizable_condition. */
6023 if (code
== COND_EXPR
)
6026 /* For pointer addition and subtraction, we should use the normal
6027 plus and minus for the vector operation. */
6028 if (code
== POINTER_PLUS_EXPR
)
6030 if (code
== POINTER_DIFF_EXPR
)
6033 /* Support only unary or binary operations. */
6034 op_type
= TREE_CODE_LENGTH (code
);
6035 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6037 if (dump_enabled_p ())
6038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6039 "num. args = %d (not unary/binary/ternary op).\n",
6044 scalar_dest
= gimple_assign_lhs (stmt
);
6045 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6047 /* Most operations cannot handle bit-precision types without extra
6049 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6051 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6052 /* Exception are bitwise binary operations. */
6053 && code
!= BIT_IOR_EXPR
6054 && code
!= BIT_XOR_EXPR
6055 && code
!= BIT_AND_EXPR
)
6057 if (dump_enabled_p ())
6058 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6059 "bit-precision arithmetic not supported.\n");
6064 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6065 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6067 if (dump_enabled_p ())
6068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6069 "use not simple.\n");
6072 /* If op0 is an external or constant def, infer the vector type
6073 from the scalar type. */
6076 /* For boolean type we cannot determine vectype by
6077 invariant value (don't know whether it is a vector
6078 of booleans or vector of integers). We use output
6079 vectype because operations on boolean don't change
6081 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6083 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6085 if (dump_enabled_p ())
6086 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6087 "not supported operation on bool value.\n");
6090 vectype
= vectype_out
;
6093 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6097 gcc_assert (vectype
);
6100 if (dump_enabled_p ())
6101 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6102 "no vectype for scalar type %T\n",
6108 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6109 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6110 if (maybe_ne (nunits_out
, nunits_in
))
6113 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6114 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6115 if (op_type
== binary_op
|| op_type
== ternary_op
)
6117 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6118 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6120 if (dump_enabled_p ())
6121 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6122 "use not simple.\n");
6126 if (op_type
== ternary_op
)
6128 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6129 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6131 if (dump_enabled_p ())
6132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6133 "use not simple.\n");
6138 /* Multiple types in SLP are handled by creating the appropriate number of
6139 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6144 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6148 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6152 gcc_assert (ncopies
>= 1);
6154 /* Reject attempts to combine mask types with nonmask types, e.g. if
6155 we have an AND between a (nonmask) boolean loaded from memory and
6156 a (mask) boolean result of a comparison.
6158 TODO: We could easily fix these cases up using pattern statements. */
6159 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6160 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6161 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6163 if (dump_enabled_p ())
6164 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6165 "mixed mask and nonmask vector types\n");
6169 /* Supportable by target? */
6171 vec_mode
= TYPE_MODE (vectype
);
6172 if (code
== MULT_HIGHPART_EXPR
)
6173 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6176 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6179 if (dump_enabled_p ())
6180 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6184 target_support_p
= (optab_handler (optab
, vec_mode
)
6185 != CODE_FOR_nothing
);
6188 if (!target_support_p
)
6190 if (dump_enabled_p ())
6191 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6192 "op not supported by target.\n");
6193 /* Check only during analysis. */
6194 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6195 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
6197 if (dump_enabled_p ())
6198 dump_printf_loc (MSG_NOTE
, vect_location
,
6199 "proceeding using word mode.\n");
6202 /* Worthwhile without SIMD support? Check only during analysis. */
6203 if (!VECTOR_MODE_P (vec_mode
)
6205 && !vect_worthwhile_without_simd_p (vinfo
, code
))
6207 if (dump_enabled_p ())
6208 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6209 "not worthwhile without SIMD support.\n");
6213 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6214 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6215 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6217 if (!vec_stmt
) /* transformation not required. */
6219 /* If this operation is part of a reduction, a fully-masked loop
6220 should only change the active lanes of the reduction chain,
6221 keeping the inactive lanes as-is. */
6223 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
6226 if (cond_fn
== IFN_LAST
6227 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6228 OPTIMIZE_FOR_SPEED
))
6230 if (dump_enabled_p ())
6231 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6232 "can't use a fully-masked loop because no"
6233 " conditional operation is available.\n");
6234 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
6237 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6241 /* Put types on constant and invariant SLP children. */
6243 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6244 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6245 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6247 if (dump_enabled_p ())
6248 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6249 "incompatible vector types for invariants\n");
6253 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6254 DUMP_VECT_SCOPE ("vectorizable_operation");
6255 vect_model_simple_cost (vinfo
, stmt_info
,
6256 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6262 if (dump_enabled_p ())
6263 dump_printf_loc (MSG_NOTE
, vect_location
,
6264 "transform binary/unary operation.\n");
6266 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6268 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6269 vectors with unsigned elements, but the result is signed. So, we
6270 need to compute the MINUS_EXPR into vectype temporary and
6271 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6272 tree vec_cvt_dest
= NULL_TREE
;
6273 if (orig_code
== POINTER_DIFF_EXPR
)
6275 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6276 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6280 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6282 /* In case the vectorization factor (VF) is bigger than the number
6283 of elements that we can fit in a vectype (nunits), we have to generate
6284 more than one vector stmt - i.e - we need to "unroll" the
6285 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6286 from one copy of the vector stmt to the next, in the field
6287 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6288 stages to find the correct vector defs to be used when vectorizing
6289 stmts that use the defs of the current stmt. The example below
6290 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6291 we need to create 4 vectorized stmts):
6293 before vectorization:
6294 RELATED_STMT VEC_STMT
6298 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6300 RELATED_STMT VEC_STMT
6301 VS1_0: vx0 = memref0 VS1_1 -
6302 VS1_1: vx1 = memref1 VS1_2 -
6303 VS1_2: vx2 = memref2 VS1_3 -
6304 VS1_3: vx3 = memref3 - -
6305 S1: x = load - VS1_0
6308 step2: vectorize stmt S2 (done here):
6309 To vectorize stmt S2 we first need to find the relevant vector
6310 def for the first operand 'x'. This is, as usual, obtained from
6311 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6312 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6313 relevant vector def 'vx0'. Having found 'vx0' we can generate
6314 the vector stmt VS2_0, and as usual, record it in the
6315 STMT_VINFO_VEC_STMT of stmt S2.
6316 When creating the second copy (VS2_1), we obtain the relevant vector
6317 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6318 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6319 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6320 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6321 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6322 chain of stmts and pointers:
6323 RELATED_STMT VEC_STMT
6324 VS1_0: vx0 = memref0 VS1_1 -
6325 VS1_1: vx1 = memref1 VS1_2 -
6326 VS1_2: vx2 = memref2 VS1_3 -
6327 VS1_3: vx3 = memref3 - -
6328 S1: x = load - VS1_0
6329 VS2_0: vz0 = vx0 + v1 VS2_1 -
6330 VS2_1: vz1 = vx1 + v1 VS2_2 -
6331 VS2_2: vz2 = vx2 + v1 VS2_3 -
6332 VS2_3: vz3 = vx3 + v1 - -
6333 S2: z = x + 1 - VS2_0 */
6335 prev_stmt_info
= NULL
;
6336 for (j
= 0; j
< ncopies
; j
++)
6341 if (op_type
== binary_op
)
6342 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
,
6343 &vec_oprnds0
, &vec_oprnds1
, slp_node
);
6344 else if (op_type
== ternary_op
)
6348 auto_vec
<vec
<tree
> > vec_defs(3);
6349 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
6350 vec_oprnds0
= vec_defs
[0];
6351 vec_oprnds1
= vec_defs
[1];
6352 vec_oprnds2
= vec_defs
[2];
6356 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
, &vec_oprnds0
,
6357 &vec_oprnds1
, NULL
);
6358 vect_get_vec_defs (vinfo
, op2
, NULL_TREE
, stmt_info
,
6359 &vec_oprnds2
, NULL
, NULL
);
6363 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
,
6368 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6369 if (op_type
== ternary_op
)
6371 tree vec_oprnd
= vec_oprnds2
.pop ();
6372 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6377 /* Arguments are ready. Create the new vector stmt. */
6378 stmt_vec_info new_stmt_info
= NULL
;
6379 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6381 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6382 ? vec_oprnds1
[i
] : NULL_TREE
);
6383 vop2
= ((op_type
== ternary_op
)
6384 ? vec_oprnds2
[i
] : NULL_TREE
);
6385 if (masked_loop_p
&& reduc_idx
>= 0)
6387 /* Perform the operation on active elements only and take
6388 inactive elements from the reduction chain input. */
6390 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6391 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6392 vectype
, i
* ncopies
+ j
);
6393 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6395 new_temp
= make_ssa_name (vec_dest
, call
);
6396 gimple_call_set_lhs (call
, new_temp
);
6397 gimple_call_set_nothrow (call
, true);
6399 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6403 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6405 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6406 gimple_assign_set_lhs (new_stmt
, new_temp
);
6408 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6411 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6413 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6415 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6416 gimple_assign_set_lhs (new_stmt
, new_temp
);
6417 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
6422 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6429 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6431 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6432 prev_stmt_info
= new_stmt_info
;
6435 vec_oprnds0
.release ();
6436 vec_oprnds1
.release ();
6437 vec_oprnds2
.release ();
6442 /* A helper function to ensure data reference DR_INFO's base alignment. */
6445 ensure_base_align (dr_vec_info
*dr_info
)
6447 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6450 if (dr_info
->base_misaligned
)
6452 tree base_decl
= dr_info
->base_decl
;
6454 // We should only be able to increase the alignment of a base object if
6455 // we know what its new alignment should be at compile time.
6456 unsigned HOST_WIDE_INT align_base_to
=
6457 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6459 if (decl_in_symtab_p (base_decl
))
6460 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6461 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6463 SET_DECL_ALIGN (base_decl
, align_base_to
);
6464 DECL_USER_ALIGN (base_decl
) = 1;
6466 dr_info
->base_misaligned
= false;
6471 /* Function get_group_alias_ptr_type.
6473 Return the alias type for the group starting at FIRST_STMT_INFO. */
6476 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6478 struct data_reference
*first_dr
, *next_dr
;
6480 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6481 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6482 while (next_stmt_info
)
6484 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6485 if (get_alias_set (DR_REF (first_dr
))
6486 != get_alias_set (DR_REF (next_dr
)))
6488 if (dump_enabled_p ())
6489 dump_printf_loc (MSG_NOTE
, vect_location
,
6490 "conflicting alias set types.\n");
6491 return ptr_type_node
;
6493 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6495 return reference_alias_ptr_type (DR_REF (first_dr
));
6499 /* Function scan_operand_equal_p.
6501 Helper function for check_scan_store. Compare two references
6502 with .GOMP_SIMD_LANE bases. */
6505 scan_operand_equal_p (tree ref1
, tree ref2
)
6507 tree ref
[2] = { ref1
, ref2
};
6508 poly_int64 bitsize
[2], bitpos
[2];
6509 tree offset
[2], base
[2];
6510 for (int i
= 0; i
< 2; ++i
)
6513 int unsignedp
, reversep
, volatilep
= 0;
6514 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6515 &offset
[i
], &mode
, &unsignedp
,
6516 &reversep
, &volatilep
);
6517 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6519 if (TREE_CODE (base
[i
]) == MEM_REF
6520 && offset
[i
] == NULL_TREE
6521 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6523 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6524 if (is_gimple_assign (def_stmt
)
6525 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6526 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6527 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6529 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6531 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6532 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6537 if (!operand_equal_p (base
[0], base
[1], 0))
6539 if (maybe_ne (bitsize
[0], bitsize
[1]))
6541 if (offset
[0] != offset
[1])
6543 if (!offset
[0] || !offset
[1])
6545 if (!operand_equal_p (offset
[0], offset
[1], 0))
6548 for (int i
= 0; i
< 2; ++i
)
6550 step
[i
] = integer_one_node
;
6551 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6553 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6554 if (is_gimple_assign (def_stmt
)
6555 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6556 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6559 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6560 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6563 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6565 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6566 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6568 tree rhs1
= NULL_TREE
;
6569 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6571 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6572 if (gimple_assign_cast_p (def_stmt
))
6573 rhs1
= gimple_assign_rhs1 (def_stmt
);
6575 else if (CONVERT_EXPR_P (offset
[i
]))
6576 rhs1
= TREE_OPERAND (offset
[i
], 0);
6578 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6579 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6580 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6581 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6584 if (!operand_equal_p (offset
[0], offset
[1], 0)
6585 || !operand_equal_p (step
[0], step
[1], 0))
6593 enum scan_store_kind
{
6594 /* Normal permutation. */
6595 scan_store_kind_perm
,
6597 /* Whole vector left shift permutation with zero init. */
6598 scan_store_kind_lshift_zero
,
6600 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6601 scan_store_kind_lshift_cond
6604 /* Function check_scan_store.
6606 Verify if we can perform the needed permutations or whole vector shifts.
6607 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6608 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6609 to do at each step. */
6612 scan_store_can_perm_p (tree vectype
, tree init
,
6613 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6615 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6616 unsigned HOST_WIDE_INT nunits
;
6617 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6619 int units_log2
= exact_log2 (nunits
);
6620 if (units_log2
<= 0)
6624 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6625 for (i
= 0; i
<= units_log2
; ++i
)
6627 unsigned HOST_WIDE_INT j
, k
;
6628 enum scan_store_kind kind
= scan_store_kind_perm
;
6629 vec_perm_builder
sel (nunits
, nunits
, 1);
6630 sel
.quick_grow (nunits
);
6631 if (i
== units_log2
)
6633 for (j
= 0; j
< nunits
; ++j
)
6634 sel
[j
] = nunits
- 1;
6638 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6640 for (k
= 0; j
< nunits
; ++j
, ++k
)
6641 sel
[j
] = nunits
+ k
;
6643 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6644 if (!can_vec_perm_const_p (vec_mode
, indices
))
6646 if (i
== units_log2
)
6649 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6651 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6653 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6654 /* Whole vector shifts shift in zeros, so if init is all zero
6655 constant, there is no need to do anything further. */
6656 if ((TREE_CODE (init
) != INTEGER_CST
6657 && TREE_CODE (init
) != REAL_CST
)
6658 || !initializer_zerop (init
))
6660 tree masktype
= truth_type_for (vectype
);
6661 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6663 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6666 kind
= whole_vector_shift_kind
;
6668 if (use_whole_vector
)
6670 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6671 use_whole_vector
->safe_grow_cleared (i
);
6672 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6673 use_whole_vector
->safe_push (kind
);
6681 /* Function check_scan_store.
6683 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6686 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6687 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6688 vect_memory_access_type memory_access_type
)
6690 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6691 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6694 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6697 || memory_access_type
!= VMAT_CONTIGUOUS
6698 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6699 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6700 || loop_vinfo
== NULL
6701 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6702 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6703 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6704 || !integer_zerop (DR_INIT (dr_info
->dr
))
6705 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6706 || !alias_sets_conflict_p (get_alias_set (vectype
),
6707 get_alias_set (TREE_TYPE (ref_type
))))
6709 if (dump_enabled_p ())
6710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6711 "unsupported OpenMP scan store.\n");
6715 /* We need to pattern match code built by OpenMP lowering and simplified
6716 by following optimizations into something we can handle.
6717 #pragma omp simd reduction(inscan,+:r)
6721 #pragma omp scan inclusive (r)
6724 shall have body with:
6725 // Initialization for input phase, store the reduction initializer:
6726 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6727 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6729 // Actual input phase:
6731 r.0_5 = D.2042[_20];
6734 // Initialization for scan phase:
6735 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6741 // Actual scan phase:
6743 r.1_8 = D.2042[_20];
6745 The "omp simd array" variable D.2042 holds the privatized copy used
6746 inside of the loop and D.2043 is another one that holds copies of
6747 the current original list item. The separate GOMP_SIMD_LANE ifn
6748 kinds are there in order to allow optimizing the initializer store
6749 and combiner sequence, e.g. if it is originally some C++ish user
6750 defined reduction, but allow the vectorizer to pattern recognize it
6751 and turn into the appropriate vectorized scan.
6753 For exclusive scan, this is slightly different:
6754 #pragma omp simd reduction(inscan,+:r)
6758 #pragma omp scan exclusive (r)
6761 shall have body with:
6762 // Initialization for input phase, store the reduction initializer:
6763 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6764 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6766 // Actual input phase:
6768 r.0_5 = D.2042[_20];
6771 // Initialization for scan phase:
6772 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6778 // Actual scan phase:
6780 r.1_8 = D.2044[_20];
6783 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6785 /* Match the D.2042[_21] = 0; store above. Just require that
6786 it is a constant or external definition store. */
6787 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6790 if (dump_enabled_p ())
6791 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6792 "unsupported OpenMP scan initializer store.\n");
6796 if (! loop_vinfo
->scan_map
)
6797 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6798 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6799 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6802 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6804 /* These stores can be vectorized normally. */
6808 if (rhs_dt
!= vect_internal_def
)
6811 if (dump_enabled_p ())
6812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6813 "unsupported OpenMP scan combiner pattern.\n");
6817 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6818 tree rhs
= gimple_assign_rhs1 (stmt
);
6819 if (TREE_CODE (rhs
) != SSA_NAME
)
6822 gimple
*other_store_stmt
= NULL
;
6823 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6824 bool inscan_var_store
6825 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6827 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6829 if (!inscan_var_store
)
6831 use_operand_p use_p
;
6832 imm_use_iterator iter
;
6833 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6835 gimple
*use_stmt
= USE_STMT (use_p
);
6836 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6838 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6839 || !is_gimple_assign (use_stmt
)
6840 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6842 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6844 other_store_stmt
= use_stmt
;
6846 if (other_store_stmt
== NULL
)
6848 rhs
= gimple_assign_lhs (other_store_stmt
);
6849 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6853 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6855 use_operand_p use_p
;
6856 imm_use_iterator iter
;
6857 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6859 gimple
*use_stmt
= USE_STMT (use_p
);
6860 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6862 if (other_store_stmt
)
6864 other_store_stmt
= use_stmt
;
6870 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6871 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6872 || !is_gimple_assign (def_stmt
)
6873 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6876 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6877 /* For pointer addition, we should use the normal plus for the vector
6881 case POINTER_PLUS_EXPR
:
6884 case MULT_HIGHPART_EXPR
:
6889 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6892 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6893 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6894 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6897 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6898 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6899 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6900 || !gimple_assign_load_p (load1_stmt
)
6901 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6902 || !gimple_assign_load_p (load2_stmt
))
6905 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6906 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6907 if (load1_stmt_info
== NULL
6908 || load2_stmt_info
== NULL
6909 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6910 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6911 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6912 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6915 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6917 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6918 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6919 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6921 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6923 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6927 use_operand_p use_p
;
6928 imm_use_iterator iter
;
6929 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6931 gimple
*use_stmt
= USE_STMT (use_p
);
6932 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6934 if (other_store_stmt
)
6936 other_store_stmt
= use_stmt
;
6940 if (other_store_stmt
== NULL
)
6942 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6943 || !gimple_store_p (other_store_stmt
))
6946 stmt_vec_info other_store_stmt_info
6947 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6948 if (other_store_stmt_info
== NULL
6949 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6950 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6953 gimple
*stmt1
= stmt
;
6954 gimple
*stmt2
= other_store_stmt
;
6955 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6956 std::swap (stmt1
, stmt2
);
6957 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6958 gimple_assign_rhs1 (load2_stmt
)))
6960 std::swap (rhs1
, rhs2
);
6961 std::swap (load1_stmt
, load2_stmt
);
6962 std::swap (load1_stmt_info
, load2_stmt_info
);
6964 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6965 gimple_assign_rhs1 (load1_stmt
)))
6968 tree var3
= NULL_TREE
;
6969 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6970 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6971 gimple_assign_rhs1 (load2_stmt
)))
6973 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6975 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6976 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6977 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6979 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6980 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6981 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6982 || lookup_attribute ("omp simd inscan exclusive",
6983 DECL_ATTRIBUTES (var3
)))
6987 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6988 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6989 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6992 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6993 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6994 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6995 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6996 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6997 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7000 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7001 std::swap (var1
, var2
);
7003 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7005 if (!lookup_attribute ("omp simd inscan exclusive",
7006 DECL_ATTRIBUTES (var1
)))
7011 if (loop_vinfo
->scan_map
== NULL
)
7013 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7017 /* The IL is as expected, now check if we can actually vectorize it.
7024 should be vectorized as (where _40 is the vectorized rhs
7025 from the D.2042[_21] = 0; store):
7026 _30 = MEM <vector(8) int> [(int *)&D.2043];
7027 _31 = MEM <vector(8) int> [(int *)&D.2042];
7028 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7030 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7031 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7033 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7034 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7035 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7037 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7038 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7040 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7041 MEM <vector(8) int> [(int *)&D.2043] = _39;
7042 MEM <vector(8) int> [(int *)&D.2042] = _38;
7049 should be vectorized as (where _40 is the vectorized rhs
7050 from the D.2042[_21] = 0; store):
7051 _30 = MEM <vector(8) int> [(int *)&D.2043];
7052 _31 = MEM <vector(8) int> [(int *)&D.2042];
7053 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7054 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7056 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7057 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7058 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7060 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7061 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7062 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7064 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7065 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7068 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7069 MEM <vector(8) int> [(int *)&D.2044] = _39;
7070 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7071 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7072 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7073 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7076 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7077 if (units_log2
== -1)
7084 /* Function vectorizable_scan_store.
7086 Helper of vectorizable_score, arguments like on vectorizable_store.
7087 Handle only the transformation, checking is done in check_scan_store. */
7090 vectorizable_scan_store (vec_info
*vinfo
,
7091 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7092 stmt_vec_info
*vec_stmt
, int ncopies
)
7094 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7095 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7096 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7097 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7099 if (dump_enabled_p ())
7100 dump_printf_loc (MSG_NOTE
, vect_location
,
7101 "transform scan store. ncopies = %d\n", ncopies
);
7103 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7104 tree rhs
= gimple_assign_rhs1 (stmt
);
7105 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7107 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7108 bool inscan_var_store
7109 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7111 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7113 use_operand_p use_p
;
7114 imm_use_iterator iter
;
7115 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7117 gimple
*use_stmt
= USE_STMT (use_p
);
7118 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7120 rhs
= gimple_assign_lhs (use_stmt
);
7125 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7126 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7127 if (code
== POINTER_PLUS_EXPR
)
7129 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7130 && commutative_tree_code (code
));
7131 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7132 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7133 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7134 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7135 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7136 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7137 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7138 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7139 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7140 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7141 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7143 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7145 std::swap (rhs1
, rhs2
);
7146 std::swap (var1
, var2
);
7147 std::swap (load1_dr_info
, load2_dr_info
);
7150 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7153 unsigned HOST_WIDE_INT nunits
;
7154 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7156 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7157 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7158 gcc_assert (units_log2
> 0);
7159 auto_vec
<tree
, 16> perms
;
7160 perms
.quick_grow (units_log2
+ 1);
7161 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7162 for (int i
= 0; i
<= units_log2
; ++i
)
7164 unsigned HOST_WIDE_INT j
, k
;
7165 vec_perm_builder
sel (nunits
, nunits
, 1);
7166 sel
.quick_grow (nunits
);
7167 if (i
== units_log2
)
7168 for (j
= 0; j
< nunits
; ++j
)
7169 sel
[j
] = nunits
- 1;
7172 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7174 for (k
= 0; j
< nunits
; ++j
, ++k
)
7175 sel
[j
] = nunits
+ k
;
7177 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7178 if (!use_whole_vector
.is_empty ()
7179 && use_whole_vector
[i
] != scan_store_kind_perm
)
7181 if (zero_vec
== NULL_TREE
)
7182 zero_vec
= build_zero_cst (vectype
);
7183 if (masktype
== NULL_TREE
7184 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7185 masktype
= truth_type_for (vectype
);
7186 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7189 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7192 stmt_vec_info prev_stmt_info
= NULL
;
7193 tree vec_oprnd1
= NULL_TREE
;
7194 tree vec_oprnd2
= NULL_TREE
;
7195 tree vec_oprnd3
= NULL_TREE
;
7196 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7197 tree dataref_offset
= build_int_cst (ref_type
, 0);
7198 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7199 vectype
, VMAT_CONTIGUOUS
);
7200 tree ldataref_ptr
= NULL_TREE
;
7201 tree orig
= NULL_TREE
;
7202 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7203 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7204 for (int j
= 0; j
< ncopies
; j
++)
7206 stmt_vec_info new_stmt_info
;
7209 vec_oprnd1
= vect_get_vec_def_for_operand (vinfo
, *init
, stmt_info
);
7210 if (ldataref_ptr
== NULL
)
7211 vec_oprnd2
= vect_get_vec_def_for_operand (vinfo
, rhs1
, stmt_info
);
7212 vec_oprnd3
= vect_get_vec_def_for_operand (vinfo
, rhs2
, stmt_info
);
7217 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
7218 if (ldataref_ptr
== NULL
)
7219 vec_oprnd2
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd2
);
7220 vec_oprnd3
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd3
);
7221 if (!inscan_var_store
)
7222 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7227 vec_oprnd2
= make_ssa_name (vectype
);
7228 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7229 unshare_expr (ldataref_ptr
),
7231 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7232 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7233 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7234 if (prev_stmt_info
== NULL
)
7235 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7237 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7238 prev_stmt_info
= new_stmt_info
;
7241 tree v
= vec_oprnd2
;
7242 for (int i
= 0; i
< units_log2
; ++i
)
7244 tree new_temp
= make_ssa_name (vectype
);
7245 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7247 && (use_whole_vector
[i
]
7248 != scan_store_kind_perm
))
7249 ? zero_vec
: vec_oprnd1
, v
,
7251 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7252 if (prev_stmt_info
== NULL
)
7253 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7255 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7256 prev_stmt_info
= new_stmt_info
;
7258 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7260 /* Whole vector shift shifted in zero bits, but if *init
7261 is not initializer_zerop, we need to replace those elements
7262 with elements from vec_oprnd1. */
7263 tree_vector_builder
vb (masktype
, nunits
, 1);
7264 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7265 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7266 ? boolean_false_node
: boolean_true_node
);
7268 tree new_temp2
= make_ssa_name (vectype
);
7269 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7270 new_temp
, vec_oprnd1
);
7271 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
7273 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7274 prev_stmt_info
= new_stmt_info
;
7275 new_temp
= new_temp2
;
7278 /* For exclusive scan, perform the perms[i] permutation once
7281 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7289 tree new_temp2
= make_ssa_name (vectype
);
7290 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7291 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7292 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7293 prev_stmt_info
= new_stmt_info
;
7298 tree new_temp
= make_ssa_name (vectype
);
7299 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7300 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7301 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7302 prev_stmt_info
= new_stmt_info
;
7304 tree last_perm_arg
= new_temp
;
7305 /* For exclusive scan, new_temp computed above is the exclusive scan
7306 prefix sum. Turn it into inclusive prefix sum for the broadcast
7307 of the last element into orig. */
7308 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7310 last_perm_arg
= make_ssa_name (vectype
);
7311 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7312 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7313 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7314 prev_stmt_info
= new_stmt_info
;
7317 orig
= make_ssa_name (vectype
);
7318 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7319 last_perm_arg
, perms
[units_log2
]);
7320 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7321 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7322 prev_stmt_info
= new_stmt_info
;
7324 if (!inscan_var_store
)
7326 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7327 unshare_expr (dataref_ptr
),
7329 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7330 g
= gimple_build_assign (data_ref
, new_temp
);
7331 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7332 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7333 prev_stmt_info
= new_stmt_info
;
7337 if (inscan_var_store
)
7338 for (int j
= 0; j
< ncopies
; j
++)
7341 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7343 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7344 unshare_expr (dataref_ptr
),
7346 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7347 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7348 stmt_vec_info new_stmt_info
7349 = vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7350 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7351 prev_stmt_info
= new_stmt_info
;
7357 /* Function vectorizable_store.
7359 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7360 that can be vectorized.
7361 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7362 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7363 Return true if STMT_INFO is vectorizable in this way. */
7366 vectorizable_store (vec_info
*vinfo
,
7367 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7368 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7369 stmt_vector_for_cost
*cost_vec
)
7373 tree vec_oprnd
= NULL_TREE
;
7375 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7376 class loop
*loop
= NULL
;
7377 machine_mode vec_mode
;
7379 enum dr_alignment_support alignment_support_scheme
;
7380 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7381 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7382 stmt_vec_info prev_stmt_info
= NULL
;
7383 tree dataref_ptr
= NULL_TREE
;
7384 tree dataref_offset
= NULL_TREE
;
7385 gimple
*ptr_incr
= NULL
;
7388 stmt_vec_info first_stmt_info
;
7390 unsigned int group_size
, i
;
7391 vec
<tree
> oprnds
= vNULL
;
7392 vec
<tree
> result_chain
= vNULL
;
7393 tree offset
= NULL_TREE
;
7394 vec
<tree
> vec_oprnds
= vNULL
;
7395 bool slp
= (slp_node
!= NULL
);
7396 unsigned int vec_num
;
7397 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7399 gather_scatter_info gs_info
;
7401 vec_load_store_type vls_type
;
7404 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7407 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7411 /* Is vectorizable store? */
7413 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7414 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7416 tree scalar_dest
= gimple_assign_lhs (assign
);
7417 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7418 && is_pattern_stmt_p (stmt_info
))
7419 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7420 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7421 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7422 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7423 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7424 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7425 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7426 && TREE_CODE (scalar_dest
) != MEM_REF
)
7431 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7432 if (!call
|| !gimple_call_internal_p (call
))
7435 internal_fn ifn
= gimple_call_internal_fn (call
);
7436 if (!internal_store_fn_p (ifn
))
7439 if (slp_node
!= NULL
)
7441 if (dump_enabled_p ())
7442 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7443 "SLP of masked stores not supported.\n");
7447 int mask_index
= internal_fn_mask_index (ifn
);
7448 if (mask_index
>= 0)
7450 mask
= gimple_call_arg (call
, mask_index
);
7451 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
7457 op
= vect_get_store_rhs (stmt_info
);
7459 /* Cannot have hybrid store SLP -- that would mean storing to the
7460 same location twice. */
7461 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7463 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7464 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7468 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7469 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7474 /* Multiple types in SLP are handled by creating the appropriate number of
7475 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7480 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7482 gcc_assert (ncopies
>= 1);
7484 /* FORNOW. This restriction should be relaxed. */
7485 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7487 if (dump_enabled_p ())
7488 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7489 "multiple types in nested loop.\n");
7493 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7494 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7497 elem_type
= TREE_TYPE (vectype
);
7498 vec_mode
= TYPE_MODE (vectype
);
7500 if (!STMT_VINFO_DATA_REF (stmt_info
))
7503 vect_memory_access_type memory_access_type
;
7504 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp
, mask
, vls_type
,
7505 ncopies
, &memory_access_type
, &gs_info
))
7510 if (memory_access_type
== VMAT_CONTIGUOUS
)
7512 if (!VECTOR_MODE_P (vec_mode
)
7513 || !can_vec_mask_load_store_p (vec_mode
,
7514 TYPE_MODE (mask_vectype
), false))
7517 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7518 && (memory_access_type
!= VMAT_GATHER_SCATTER
7519 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7521 if (dump_enabled_p ())
7522 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7523 "unsupported access type for masked store.\n");
7529 /* FORNOW. In some cases can vectorize even if data-type not supported
7530 (e.g. - array initialization with 0). */
7531 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7535 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7536 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7537 && memory_access_type
!= VMAT_GATHER_SCATTER
7538 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7541 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7542 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7543 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7547 first_stmt_info
= stmt_info
;
7548 first_dr_info
= dr_info
;
7549 group_size
= vec_num
= 1;
7552 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7554 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7555 memory_access_type
))
7559 if (!vec_stmt
) /* transformation not required. */
7561 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7564 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7565 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
7566 memory_access_type
, &gs_info
, mask
);
7569 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7572 if (dump_enabled_p ())
7573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7574 "incompatible vector types for invariants\n");
7578 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7579 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7580 memory_access_type
, vls_type
, slp_node
, cost_vec
);
7583 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7587 ensure_base_align (dr_info
);
7589 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7591 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7592 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7593 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7594 tree ptr
, var
, scale
, vec_mask
;
7595 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7596 tree mask_halfvectype
= mask_vectype
;
7597 edge pe
= loop_preheader_edge (loop
);
7600 enum { NARROW
, NONE
, WIDEN
} modifier
;
7601 poly_uint64 scatter_off_nunits
7602 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7604 if (known_eq (nunits
, scatter_off_nunits
))
7606 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7610 /* Currently gathers and scatters are only supported for
7611 fixed-length vectors. */
7612 unsigned int count
= scatter_off_nunits
.to_constant ();
7613 vec_perm_builder
sel (count
, count
, 1);
7614 for (i
= 0; i
< (unsigned int) count
; ++i
)
7615 sel
.quick_push (i
| (count
/ 2));
7617 vec_perm_indices
indices (sel
, 1, count
);
7618 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7620 gcc_assert (perm_mask
!= NULL_TREE
);
7622 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7626 /* Currently gathers and scatters are only supported for
7627 fixed-length vectors. */
7628 unsigned int count
= nunits
.to_constant ();
7629 vec_perm_builder
sel (count
, count
, 1);
7630 for (i
= 0; i
< (unsigned int) count
; ++i
)
7631 sel
.quick_push (i
| (count
/ 2));
7633 vec_perm_indices
indices (sel
, 2, count
);
7634 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7635 gcc_assert (perm_mask
!= NULL_TREE
);
7639 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7644 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7645 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7646 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7647 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7648 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7649 scaletype
= TREE_VALUE (arglist
);
7651 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7652 && TREE_CODE (rettype
) == VOID_TYPE
);
7654 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7655 if (!is_gimple_min_invariant (ptr
))
7657 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7658 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7659 gcc_assert (!new_bb
);
7662 if (mask
== NULL_TREE
)
7664 mask_arg
= build_int_cst (masktype
, -1);
7665 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7666 mask_arg
, masktype
, NULL
);
7669 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7671 prev_stmt_info
= NULL
;
7672 for (j
= 0; j
< ncopies
; ++j
)
7676 src
= vec_oprnd1
= vect_get_vec_def_for_operand (vinfo
,
7678 op
= vec_oprnd0
= vect_get_vec_def_for_operand (vinfo
,
7683 tree mask_vectype
= truth_type_for (vectype
);
7685 = vect_get_vec_def_for_operand (vinfo
, mask
,
7686 stmt_info
, mask_vectype
);
7689 else if (modifier
!= NONE
&& (j
& 1))
7691 if (modifier
== WIDEN
)
7694 = vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7696 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7697 perm_mask
, stmt_info
, gsi
);
7700 = vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7703 else if (modifier
== NARROW
)
7705 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7706 perm_mask
, stmt_info
, gsi
);
7707 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7715 src
= vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7717 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7720 mask_op
= vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7724 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7726 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7727 TYPE_VECTOR_SUBPARTS (srctype
)));
7728 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7729 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7731 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7732 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7736 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7738 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7739 TYPE_VECTOR_SUBPARTS (idxtype
)));
7740 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7741 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7743 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7744 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7752 if (modifier
== NARROW
)
7754 var
= vect_get_new_ssa_name (mask_halfvectype
,
7757 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7758 : VEC_UNPACK_LO_EXPR
,
7760 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7763 tree optype
= TREE_TYPE (mask_arg
);
7764 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7767 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7768 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7769 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7771 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7772 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7774 if (!useless_type_conversion_p (masktype
, utype
))
7776 gcc_assert (TYPE_PRECISION (utype
)
7777 <= TYPE_PRECISION (masktype
));
7778 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7779 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7780 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7786 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7787 stmt_vec_info new_stmt_info
7788 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7790 if (prev_stmt_info
== NULL
)
7791 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7793 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7794 prev_stmt_info
= new_stmt_info
;
7798 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7799 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7801 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7802 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7807 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7809 /* We vectorize all the stmts of the interleaving group when we
7810 reach the last stmt in the group. */
7811 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7812 < DR_GROUP_SIZE (first_stmt_info
)
7821 grouped_store
= false;
7822 /* VEC_NUM is the number of vect stmts to be created for this
7824 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7825 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7826 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7827 == first_stmt_info
);
7828 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7829 op
= vect_get_store_rhs (first_stmt_info
);
7832 /* VEC_NUM is the number of vect stmts to be created for this
7834 vec_num
= group_size
;
7836 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7839 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7841 if (dump_enabled_p ())
7842 dump_printf_loc (MSG_NOTE
, vect_location
,
7843 "transform store. ncopies = %d\n", ncopies
);
7845 if (memory_access_type
== VMAT_ELEMENTWISE
7846 || memory_access_type
== VMAT_STRIDED_SLP
)
7848 gimple_stmt_iterator incr_gsi
;
7854 tree stride_base
, stride_step
, alias_off
;
7858 /* Checked by get_load_store_type. */
7859 unsigned int const_nunits
= nunits
.to_constant ();
7861 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7862 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7864 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7866 = fold_build_pointer_plus
7867 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7868 size_binop (PLUS_EXPR
,
7869 convert_to_ptrofftype (dr_offset
),
7870 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7871 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7873 /* For a store with loop-invariant (but other than power-of-2)
7874 stride (i.e. not a grouped access) like so:
7876 for (i = 0; i < n; i += stride)
7879 we generate a new induction variable and new stores from
7880 the components of the (vectorized) rhs:
7882 for (j = 0; ; j += VF*stride)
7887 array[j + stride] = tmp2;
7891 unsigned nstores
= const_nunits
;
7893 tree ltype
= elem_type
;
7894 tree lvectype
= vectype
;
7897 if (group_size
< const_nunits
7898 && const_nunits
% group_size
== 0)
7900 nstores
= const_nunits
/ group_size
;
7902 ltype
= build_vector_type (elem_type
, group_size
);
7905 /* First check if vec_extract optab doesn't support extraction
7906 of vector elts directly. */
7907 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7909 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7910 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7911 group_size
).exists (&vmode
)
7912 || (convert_optab_handler (vec_extract_optab
,
7913 TYPE_MODE (vectype
), vmode
)
7914 == CODE_FOR_nothing
))
7916 /* Try to avoid emitting an extract of vector elements
7917 by performing the extracts using an integer type of the
7918 same size, extracting from a vector of those and then
7919 re-interpreting it as the original vector type if
7922 = group_size
* GET_MODE_BITSIZE (elmode
);
7923 unsigned int lnunits
= const_nunits
/ group_size
;
7924 /* If we can't construct such a vector fall back to
7925 element extracts from the original vector type and
7926 element size stores. */
7927 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7928 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7929 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7930 lnunits
).exists (&vmode
)
7931 && (convert_optab_handler (vec_extract_optab
,
7933 != CODE_FOR_nothing
))
7937 ltype
= build_nonstandard_integer_type (lsize
, 1);
7938 lvectype
= build_vector_type (ltype
, nstores
);
7940 /* Else fall back to vector extraction anyway.
7941 Fewer stores are more important than avoiding spilling
7942 of the vector we extract from. Compared to the
7943 construction case in vectorizable_load no store-forwarding
7944 issue exists here for reasonable archs. */
7947 else if (group_size
>= const_nunits
7948 && group_size
% const_nunits
== 0)
7951 lnel
= const_nunits
;
7955 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7956 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7959 ivstep
= stride_step
;
7960 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7961 build_int_cst (TREE_TYPE (ivstep
), vf
));
7963 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7965 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7966 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7967 create_iv (stride_base
, ivstep
, NULL
,
7968 loop
, &incr_gsi
, insert_after
,
7970 incr
= gsi_stmt (incr_gsi
);
7971 loop_vinfo
->add_stmt (incr
);
7973 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7975 prev_stmt_info
= NULL
;
7976 alias_off
= build_int_cst (ref_type
, 0);
7977 stmt_vec_info next_stmt_info
= first_stmt_info
;
7978 for (g
= 0; g
< group_size
; g
++)
7980 running_off
= offvar
;
7983 tree size
= TYPE_SIZE_UNIT (ltype
);
7984 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7986 tree newoff
= copy_ssa_name (running_off
, NULL
);
7987 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7989 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7990 running_off
= newoff
;
7992 unsigned int group_el
= 0;
7993 unsigned HOST_WIDE_INT
7994 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7995 for (j
= 0; j
< ncopies
; j
++)
7997 /* We've set op and dt above, from vect_get_store_rhs,
7998 and first_stmt_info == stmt_info. */
8003 vect_get_vec_defs (vinfo
, op
, NULL_TREE
, stmt_info
,
8004 &vec_oprnds
, NULL
, slp_node
);
8005 vec_oprnd
= vec_oprnds
[0];
8009 op
= vect_get_store_rhs (next_stmt_info
);
8010 vec_oprnd
= vect_get_vec_def_for_operand
8011 (vinfo
, op
, next_stmt_info
);
8017 vec_oprnd
= vec_oprnds
[j
];
8019 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
8022 /* Pun the vector to extract from if necessary. */
8023 if (lvectype
!= vectype
)
8025 tree tem
= make_ssa_name (lvectype
);
8027 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
8028 lvectype
, vec_oprnd
));
8029 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8032 for (i
= 0; i
< nstores
; i
++)
8034 tree newref
, newoff
;
8035 gimple
*incr
, *assign
;
8036 tree size
= TYPE_SIZE (ltype
);
8037 /* Extract the i'th component. */
8038 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8039 bitsize_int (i
), size
);
8040 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8043 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8047 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8049 newref
= build2 (MEM_REF
, ltype
,
8050 running_off
, this_off
);
8051 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8053 /* And store it to *running_off. */
8054 assign
= gimple_build_assign (newref
, elem
);
8055 stmt_vec_info assign_info
8056 = vect_finish_stmt_generation (vinfo
, stmt_info
,
8061 || group_el
== group_size
)
8063 newoff
= copy_ssa_name (running_off
, NULL
);
8064 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8065 running_off
, stride_step
);
8066 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8068 running_off
= newoff
;
8071 if (g
== group_size
- 1
8074 if (j
== 0 && i
== 0)
8075 STMT_VINFO_VEC_STMT (stmt_info
)
8076 = *vec_stmt
= assign_info
;
8078 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
8079 prev_stmt_info
= assign_info
;
8083 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8088 vec_oprnds
.release ();
8092 auto_vec
<tree
> dr_chain (group_size
);
8093 oprnds
.create (group_size
);
8095 /* Gather-scatter accesses perform only component accesses, alignment
8096 is irrelevant for them. */
8097 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8098 alignment_support_scheme
= dr_unaligned_supported
;
8100 alignment_support_scheme
8101 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
8103 gcc_assert (alignment_support_scheme
);
8104 vec_loop_masks
*loop_masks
8105 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8106 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8108 /* Targets with store-lane instructions must not require explicit
8109 realignment. vect_supportable_dr_alignment always returns either
8110 dr_aligned or dr_unaligned_supported for masked operations. */
8111 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8114 || alignment_support_scheme
== dr_aligned
8115 || alignment_support_scheme
== dr_unaligned_supported
);
8117 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
8118 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8119 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8122 tree vec_offset
= NULL_TREE
;
8123 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8125 aggr_type
= NULL_TREE
;
8128 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8130 aggr_type
= elem_type
;
8131 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8132 &bump
, &vec_offset
);
8136 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8137 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8139 aggr_type
= vectype
;
8140 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8141 memory_access_type
);
8145 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8147 /* In case the vectorization factor (VF) is bigger than the number
8148 of elements that we can fit in a vectype (nunits), we have to generate
8149 more than one vector stmt - i.e - we need to "unroll" the
8150 vector stmt by a factor VF/nunits. For more details see documentation in
8151 vect_get_vec_def_for_copy_stmt. */
8153 /* In case of interleaving (non-unit grouped access):
8160 We create vectorized stores starting from base address (the access of the
8161 first stmt in the chain (S2 in the above example), when the last store stmt
8162 of the chain (S4) is reached:
8165 VS2: &base + vec_size*1 = vx0
8166 VS3: &base + vec_size*2 = vx1
8167 VS4: &base + vec_size*3 = vx3
8169 Then permutation statements are generated:
8171 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8172 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8175 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8176 (the order of the data-refs in the output of vect_permute_store_chain
8177 corresponds to the order of scalar stmts in the interleaving chain - see
8178 the documentation of vect_permute_store_chain()).
8180 In case of both multiple types and interleaving, above vector stores and
8181 permutation stmts are created for every copy. The result vector stmts are
8182 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8183 STMT_VINFO_RELATED_STMT for the next copies.
8186 prev_stmt_info
= NULL
;
8187 tree vec_mask
= NULL_TREE
;
8188 for (j
= 0; j
< ncopies
; j
++)
8190 stmt_vec_info new_stmt_info
;
8195 /* Get vectorized arguments for SLP_NODE. */
8196 vect_get_vec_defs (vinfo
, op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
8199 vec_oprnd
= vec_oprnds
[0];
8203 /* For interleaved stores we collect vectorized defs for all the
8204 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8205 used as an input to vect_permute_store_chain(), and OPRNDS as
8206 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8208 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8209 OPRNDS are of size 1. */
8210 stmt_vec_info next_stmt_info
= first_stmt_info
;
8211 for (i
= 0; i
< group_size
; i
++)
8213 /* Since gaps are not supported for interleaved stores,
8214 DR_GROUP_SIZE is the exact number of stmts in the chain.
8215 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8216 that there is no interleaving, DR_GROUP_SIZE is 1,
8217 and only one iteration of the loop will be executed. */
8218 op
= vect_get_store_rhs (next_stmt_info
);
8219 vec_oprnd
= vect_get_vec_def_for_operand
8220 (vinfo
, op
, next_stmt_info
);
8221 dr_chain
.quick_push (vec_oprnd
);
8222 oprnds
.quick_push (vec_oprnd
);
8223 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8226 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
,
8230 /* We should have catched mismatched types earlier. */
8231 gcc_assert (useless_type_conversion_p (vectype
,
8232 TREE_TYPE (vec_oprnd
)));
8233 bool simd_lane_access_p
8234 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8235 if (simd_lane_access_p
8237 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8238 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8239 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8240 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8241 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8242 get_alias_set (TREE_TYPE (ref_type
))))
8244 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8245 dataref_offset
= build_int_cst (ref_type
, 0);
8247 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8248 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
8249 &dataref_ptr
, &vec_offset
);
8252 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8253 simd_lane_access_p
? loop
: NULL
,
8254 offset
, &dummy
, gsi
, &ptr_incr
,
8255 simd_lane_access_p
, NULL_TREE
, bump
);
8259 /* For interleaved stores we created vectorized defs for all the
8260 defs stored in OPRNDS in the previous iteration (previous copy).
8261 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8262 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8264 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8265 OPRNDS are of size 1. */
8266 for (i
= 0; i
< group_size
; i
++)
8269 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
8270 dr_chain
[i
] = vec_oprnd
;
8271 oprnds
[i
] = vec_oprnd
;
8274 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8277 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8278 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8279 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8281 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8285 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8289 /* Get an array into which we can store the individual vectors. */
8290 vec_array
= create_vector_array (vectype
, vec_num
);
8292 /* Invalidate the current contents of VEC_ARRAY. This should
8293 become an RTL clobber too, which prevents the vector registers
8294 from being upward-exposed. */
8295 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8297 /* Store the individual vectors into the array. */
8298 for (i
= 0; i
< vec_num
; i
++)
8300 vec_oprnd
= dr_chain
[i
];
8301 write_vector_array (vinfo
, stmt_info
,
8302 gsi
, vec_oprnd
, vec_array
, i
);
8305 tree final_mask
= NULL
;
8307 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8310 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8317 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8319 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8320 tree alias_ptr
= build_int_cst (ref_type
, align
);
8321 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8322 dataref_ptr
, alias_ptr
,
8323 final_mask
, vec_array
);
8328 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8329 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8330 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8332 gimple_call_set_lhs (call
, data_ref
);
8334 gimple_call_set_nothrow (call
, true);
8335 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
8338 /* Record that VEC_ARRAY is now dead. */
8339 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8343 new_stmt_info
= NULL
;
8347 result_chain
.create (group_size
);
8349 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8350 gsi
, &result_chain
);
8353 stmt_vec_info next_stmt_info
= first_stmt_info
;
8354 for (i
= 0; i
< vec_num
; i
++)
8357 unsigned HOST_WIDE_INT align
;
8359 tree final_mask
= NULL_TREE
;
8361 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8363 vectype
, vec_num
* j
+ i
);
8365 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8368 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8370 tree scale
= size_int (gs_info
.scale
);
8373 call
= gimple_build_call_internal
8374 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8375 scale
, vec_oprnd
, final_mask
);
8377 call
= gimple_build_call_internal
8378 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8380 gimple_call_set_nothrow (call
, true);
8382 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8387 /* Bump the vector pointer. */
8388 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8389 gsi
, stmt_info
, bump
);
8392 vec_oprnd
= vec_oprnds
[i
];
8393 else if (grouped_store
)
8394 /* For grouped stores vectorized defs are interleaved in
8395 vect_permute_store_chain(). */
8396 vec_oprnd
= result_chain
[i
];
8398 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8399 if (aligned_access_p (first_dr_info
))
8401 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8403 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8407 misalign
= DR_MISALIGNMENT (first_dr_info
);
8408 if (dataref_offset
== NULL_TREE
8409 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8410 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8413 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8415 tree perm_mask
= perm_mask_for_reverse (vectype
);
8416 tree perm_dest
= vect_create_destination_var
8417 (vect_get_store_rhs (stmt_info
), vectype
);
8418 tree new_temp
= make_ssa_name (perm_dest
);
8420 /* Generate the permute statement. */
8422 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8423 vec_oprnd
, perm_mask
);
8424 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8426 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8427 vec_oprnd
= new_temp
;
8430 /* Arguments are ready. Create the new vector stmt. */
8433 align
= least_bit_hwi (misalign
| align
);
8434 tree ptr
= build_int_cst (ref_type
, align
);
8436 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8438 final_mask
, vec_oprnd
);
8439 gimple_call_set_nothrow (call
, true);
8441 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8445 data_ref
= fold_build2 (MEM_REF
, vectype
,
8449 : build_int_cst (ref_type
, 0));
8450 if (aligned_access_p (first_dr_info
))
8452 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8453 TREE_TYPE (data_ref
)
8454 = build_aligned_type (TREE_TYPE (data_ref
),
8455 align
* BITS_PER_UNIT
);
8457 TREE_TYPE (data_ref
)
8458 = build_aligned_type (TREE_TYPE (data_ref
),
8459 TYPE_ALIGN (elem_type
));
8460 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8462 = gimple_build_assign (data_ref
, vec_oprnd
);
8464 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8470 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8471 if (!next_stmt_info
)
8478 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8480 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8481 prev_stmt_info
= new_stmt_info
;
8486 result_chain
.release ();
8487 vec_oprnds
.release ();
8492 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8493 VECTOR_CST mask. No checks are made that the target platform supports the
8494 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8495 vect_gen_perm_mask_checked. */
8498 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8502 poly_uint64 nunits
= sel
.length ();
8503 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8505 mask_type
= build_vector_type (ssizetype
, nunits
);
8506 return vec_perm_indices_to_tree (mask_type
, sel
);
8509 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8510 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8513 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8515 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8516 return vect_gen_perm_mask_any (vectype
, sel
);
8519 /* Given a vector variable X and Y, that was generated for the scalar
8520 STMT_INFO, generate instructions to permute the vector elements of X and Y
8521 using permutation mask MASK_VEC, insert them at *GSI and return the
8522 permuted vector variable. */
8525 permute_vec_elements (vec_info
*vinfo
,
8526 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8527 gimple_stmt_iterator
*gsi
)
8529 tree vectype
= TREE_TYPE (x
);
8530 tree perm_dest
, data_ref
;
8533 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8534 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8535 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8537 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8538 data_ref
= make_ssa_name (perm_dest
);
8540 /* Generate the permute statement. */
8541 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8542 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8547 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8548 inserting them on the loops preheader edge. Returns true if we
8549 were successful in doing so (and thus STMT_INFO can be moved then),
8550 otherwise returns false. */
8553 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8559 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8561 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8562 if (!gimple_nop_p (def_stmt
)
8563 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8565 /* Make sure we don't need to recurse. While we could do
8566 so in simple cases when there are more complex use webs
8567 we don't have an easy way to preserve stmt order to fulfil
8568 dependencies within them. */
8571 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8573 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8575 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8576 if (!gimple_nop_p (def_stmt2
)
8577 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8587 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8589 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8590 if (!gimple_nop_p (def_stmt
)
8591 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8593 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8594 gsi_remove (&gsi
, false);
8595 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8602 /* vectorizable_load.
8604 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8605 that can be vectorized.
8606 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8607 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8608 Return true if STMT_INFO is vectorizable in this way. */
8611 vectorizable_load (vec_info
*vinfo
,
8612 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8613 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
8614 stmt_vector_for_cost
*cost_vec
)
8617 tree vec_dest
= NULL
;
8618 tree data_ref
= NULL
;
8619 stmt_vec_info prev_stmt_info
;
8620 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8621 class loop
*loop
= NULL
;
8622 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8623 bool nested_in_vect_loop
= false;
8628 enum dr_alignment_support alignment_support_scheme
;
8629 tree dataref_ptr
= NULL_TREE
;
8630 tree dataref_offset
= NULL_TREE
;
8631 gimple
*ptr_incr
= NULL
;
8634 unsigned int group_size
;
8635 poly_uint64 group_gap_adj
;
8636 tree msq
= NULL_TREE
, lsq
;
8637 tree offset
= NULL_TREE
;
8638 tree byte_offset
= NULL_TREE
;
8639 tree realignment_token
= NULL_TREE
;
8641 vec
<tree
> dr_chain
= vNULL
;
8642 bool grouped_load
= false;
8643 stmt_vec_info first_stmt_info
;
8644 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8645 bool compute_in_loop
= false;
8646 class loop
*at_loop
;
8648 bool slp
= (slp_node
!= NULL
);
8649 bool slp_perm
= false;
8650 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8653 gather_scatter_info gs_info
;
8655 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8657 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8660 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8664 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8665 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8667 scalar_dest
= gimple_assign_lhs (assign
);
8668 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8671 tree_code code
= gimple_assign_rhs_code (assign
);
8672 if (code
!= ARRAY_REF
8673 && code
!= BIT_FIELD_REF
8674 && code
!= INDIRECT_REF
8675 && code
!= COMPONENT_REF
8676 && code
!= IMAGPART_EXPR
8677 && code
!= REALPART_EXPR
8679 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8684 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8685 if (!call
|| !gimple_call_internal_p (call
))
8688 internal_fn ifn
= gimple_call_internal_fn (call
);
8689 if (!internal_load_fn_p (ifn
))
8692 scalar_dest
= gimple_call_lhs (call
);
8696 int mask_index
= internal_fn_mask_index (ifn
);
8697 if (mask_index
>= 0)
8699 mask
= gimple_call_arg (call
, mask_index
);
8700 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
8706 if (!STMT_VINFO_DATA_REF (stmt_info
))
8709 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8710 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8714 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8715 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8716 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8721 /* Multiple types in SLP are handled by creating the appropriate number of
8722 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8727 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8729 gcc_assert (ncopies
>= 1);
8731 /* FORNOW. This restriction should be relaxed. */
8732 if (nested_in_vect_loop
&& ncopies
> 1)
8734 if (dump_enabled_p ())
8735 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8736 "multiple types in nested loop.\n");
8740 /* Invalidate assumptions made by dependence analysis when vectorization
8741 on the unrolled body effectively re-orders stmts. */
8743 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8744 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8745 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8747 if (dump_enabled_p ())
8748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8749 "cannot perform implicit CSE when unrolling "
8750 "with negative dependence distance\n");
8754 elem_type
= TREE_TYPE (vectype
);
8755 mode
= TYPE_MODE (vectype
);
8757 /* FORNOW. In some cases can vectorize even if data-type not supported
8758 (e.g. - data copies). */
8759 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8761 if (dump_enabled_p ())
8762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8763 "Aligned load, but unsupported type.\n");
8767 /* Check if the load is a part of an interleaving chain. */
8768 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8770 grouped_load
= true;
8772 gcc_assert (!nested_in_vect_loop
);
8773 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8775 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8776 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8778 /* Refuse non-SLP vectorization of SLP-only groups. */
8779 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8781 if (dump_enabled_p ())
8782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8783 "cannot vectorize load in non-SLP mode.\n");
8787 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8793 /* In BB vectorization we may not actually use a loaded vector
8794 accessing elements in excess of DR_GROUP_SIZE. */
8795 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8796 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8797 unsigned HOST_WIDE_INT nunits
;
8798 unsigned j
, k
, maxk
= 0;
8799 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8802 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
8803 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8804 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8806 if (dump_enabled_p ())
8807 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8808 "BB vectorization with gaps at the end of "
8809 "a load is not supported\n");
8816 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8819 if (dump_enabled_p ())
8820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8822 "unsupported load permutation\n");
8827 /* Invalidate assumptions made by dependence analysis when vectorization
8828 on the unrolled body effectively re-orders stmts. */
8829 if (!PURE_SLP_STMT (stmt_info
)
8830 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8831 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8832 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8834 if (dump_enabled_p ())
8835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8836 "cannot perform implicit CSE when performing "
8837 "group loads with negative dependence distance\n");
8844 vect_memory_access_type memory_access_type
;
8845 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp
, mask
, VLS_LOAD
,
8846 ncopies
, &memory_access_type
, &gs_info
))
8851 if (memory_access_type
== VMAT_CONTIGUOUS
)
8853 machine_mode vec_mode
= TYPE_MODE (vectype
);
8854 if (!VECTOR_MODE_P (vec_mode
)
8855 || !can_vec_mask_load_store_p (vec_mode
,
8856 TYPE_MODE (mask_vectype
), true))
8859 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8860 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8862 if (dump_enabled_p ())
8863 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8864 "unsupported access type for masked load.\n");
8869 if (!vec_stmt
) /* transformation not required. */
8872 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8875 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
8876 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
8877 memory_access_type
, &gs_info
, mask
);
8879 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8880 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8881 slp_node
, cost_vec
);
8886 gcc_assert (memory_access_type
8887 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8889 if (dump_enabled_p ())
8890 dump_printf_loc (MSG_NOTE
, vect_location
,
8891 "transform load. ncopies = %d\n", ncopies
);
8895 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8896 ensure_base_align (dr_info
);
8898 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8900 vect_build_gather_load_calls (vinfo
,
8901 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8905 if (memory_access_type
== VMAT_INVARIANT
)
8907 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8908 /* If we have versioned for aliasing or the loop doesn't
8909 have any data dependencies that would preclude this,
8910 then we are sure this is a loop invariant load and
8911 thus we can insert it on the preheader edge. */
8912 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8913 && !nested_in_vect_loop
8914 && hoist_defs_of_uses (stmt_info
, loop
));
8917 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8918 if (dump_enabled_p ())
8919 dump_printf_loc (MSG_NOTE
, vect_location
,
8920 "hoisting out of the vectorized loop: %G", stmt
);
8921 scalar_dest
= copy_ssa_name (scalar_dest
);
8922 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8923 gsi_insert_on_edge_immediate
8924 (loop_preheader_edge (loop
),
8925 gimple_build_assign (scalar_dest
, rhs
));
8927 /* These copies are all equivalent, but currently the representation
8928 requires a separate STMT_VINFO_VEC_STMT for each one. */
8929 prev_stmt_info
= NULL
;
8930 gimple_stmt_iterator gsi2
= *gsi
;
8932 for (j
= 0; j
< ncopies
; j
++)
8934 stmt_vec_info new_stmt_info
;
8937 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8939 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8940 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
8944 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8946 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8949 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8951 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8953 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8954 prev_stmt_info
= new_stmt_info
;
8959 if (memory_access_type
== VMAT_ELEMENTWISE
8960 || memory_access_type
== VMAT_STRIDED_SLP
)
8962 gimple_stmt_iterator incr_gsi
;
8968 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8969 tree stride_base
, stride_step
, alias_off
;
8970 /* Checked by get_load_store_type. */
8971 unsigned int const_nunits
= nunits
.to_constant ();
8972 unsigned HOST_WIDE_INT cst_offset
= 0;
8975 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8976 gcc_assert (!nested_in_vect_loop
);
8980 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8981 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8985 first_stmt_info
= stmt_info
;
8986 first_dr_info
= dr_info
;
8988 if (slp
&& grouped_load
)
8990 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8991 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8997 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8998 * vect_get_place_in_interleaving_chain (stmt_info
,
9001 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
9004 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
9006 = fold_build_pointer_plus
9007 (DR_BASE_ADDRESS (first_dr_info
->dr
),
9008 size_binop (PLUS_EXPR
,
9009 convert_to_ptrofftype (dr_offset
),
9010 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
9011 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
9013 /* For a load with loop-invariant (but other than power-of-2)
9014 stride (i.e. not a grouped access) like so:
9016 for (i = 0; i < n; i += stride)
9019 we generate a new induction variable and new accesses to
9020 form a new vector (or vectors, depending on ncopies):
9022 for (j = 0; ; j += VF*stride)
9024 tmp2 = array[j + stride];
9026 vectemp = {tmp1, tmp2, ...}
9029 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
9030 build_int_cst (TREE_TYPE (stride_step
), vf
));
9032 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
9034 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
9035 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
9036 create_iv (stride_base
, ivstep
, NULL
,
9037 loop
, &incr_gsi
, insert_after
,
9039 incr
= gsi_stmt (incr_gsi
);
9040 loop_vinfo
->add_stmt (incr
);
9042 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9044 prev_stmt_info
= NULL
;
9045 running_off
= offvar
;
9046 alias_off
= build_int_cst (ref_type
, 0);
9047 int nloads
= const_nunits
;
9049 tree ltype
= TREE_TYPE (vectype
);
9050 tree lvectype
= vectype
;
9051 auto_vec
<tree
> dr_chain
;
9052 if (memory_access_type
== VMAT_STRIDED_SLP
)
9054 if (group_size
< const_nunits
)
9056 /* First check if vec_init optab supports construction from vector
9057 elts directly. Otherwise avoid emitting a constructor of
9058 vector elements by performing the loads using an integer type
9059 of the same size, constructing a vector of those and then
9060 re-interpreting it as the original vector type. This avoids a
9061 huge runtime penalty due to the general inability to perform
9062 store forwarding from smaller stores to a larger load. */
9065 = vector_vector_composition_type (vectype
,
9066 const_nunits
/ group_size
,
9068 if (vtype
!= NULL_TREE
)
9070 nloads
= const_nunits
/ group_size
;
9079 lnel
= const_nunits
;
9082 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9084 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9085 else if (nloads
== 1)
9090 /* For SLP permutation support we need to load the whole group,
9091 not only the number of vector stmts the permutation result
9095 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9097 unsigned int const_vf
= vf
.to_constant ();
9098 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9099 dr_chain
.create (ncopies
);
9102 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9104 unsigned int group_el
= 0;
9105 unsigned HOST_WIDE_INT
9106 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9107 for (j
= 0; j
< ncopies
; j
++)
9110 vec_alloc (v
, nloads
);
9111 stmt_vec_info new_stmt_info
= NULL
;
9112 for (i
= 0; i
< nloads
; i
++)
9114 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9115 group_el
* elsz
+ cst_offset
);
9116 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9117 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9119 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9121 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9123 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9124 gimple_assign_lhs (new_stmt
));
9128 || group_el
== group_size
)
9130 tree newoff
= copy_ssa_name (running_off
);
9131 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9132 running_off
, stride_step
);
9133 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9135 running_off
= newoff
;
9141 tree vec_inv
= build_constructor (lvectype
, v
);
9142 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9143 vec_inv
, lvectype
, gsi
);
9144 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9145 if (lvectype
!= vectype
)
9148 = gimple_build_assign (make_ssa_name (vectype
),
9150 build1 (VIEW_CONVERT_EXPR
,
9151 vectype
, new_temp
));
9153 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9161 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
9163 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9168 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9170 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9171 prev_stmt_info
= new_stmt_info
;
9177 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9183 if (memory_access_type
== VMAT_GATHER_SCATTER
9184 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9185 grouped_load
= false;
9189 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9190 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9191 /* For SLP vectorization we directly vectorize a subchain
9192 without permutation. */
9193 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9194 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9195 /* For BB vectorization always use the first stmt to base
9196 the data ref pointer on. */
9198 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9200 /* Check if the chain of loads is already vectorized. */
9201 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
9202 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9203 ??? But we can only do so if there is exactly one
9204 as we have no way to get at the rest. Leave the CSE
9206 ??? With the group load eventually participating
9207 in multiple different permutations (having multiple
9208 slp nodes which refer to the same group) the CSE
9209 is even wrong code. See PR56270. */
9212 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9215 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9218 /* VEC_NUM is the number of vect stmts to be created for this group. */
9221 grouped_load
= false;
9222 /* If an SLP permutation is from N elements to N elements,
9223 and if one vector holds a whole number of N, we can load
9224 the inputs to the permutation in the same way as an
9225 unpermuted sequence. In other cases we need to load the
9226 whole group, not only the number of vector stmts the
9227 permutation result fits in. */
9228 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9230 && (group_size
!= scalar_lanes
9231 || !multiple_p (nunits
, group_size
)))
9233 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9234 variable VF; see vect_transform_slp_perm_load. */
9235 unsigned int const_vf
= vf
.to_constant ();
9236 unsigned int const_nunits
= nunits
.to_constant ();
9237 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9238 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9242 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9244 = group_size
- scalar_lanes
;
9248 vec_num
= group_size
;
9250 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9254 first_stmt_info
= stmt_info
;
9255 first_dr_info
= dr_info
;
9256 group_size
= vec_num
= 1;
9258 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9261 /* Gather-scatter accesses perform only component accesses, alignment
9262 is irrelevant for them. */
9263 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9264 alignment_support_scheme
= dr_unaligned_supported
;
9266 alignment_support_scheme
9267 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
9269 gcc_assert (alignment_support_scheme
);
9270 vec_loop_masks
*loop_masks
9271 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9272 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9274 /* Targets with store-lane instructions must not require explicit
9275 realignment. vect_supportable_dr_alignment always returns either
9276 dr_aligned or dr_unaligned_supported for masked operations. */
9277 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9280 || alignment_support_scheme
== dr_aligned
9281 || alignment_support_scheme
== dr_unaligned_supported
);
9283 /* In case the vectorization factor (VF) is bigger than the number
9284 of elements that we can fit in a vectype (nunits), we have to generate
9285 more than one vector stmt - i.e - we need to "unroll" the
9286 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9287 from one copy of the vector stmt to the next, in the field
9288 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9289 stages to find the correct vector defs to be used when vectorizing
9290 stmts that use the defs of the current stmt. The example below
9291 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9292 need to create 4 vectorized stmts):
9294 before vectorization:
9295 RELATED_STMT VEC_STMT
9299 step 1: vectorize stmt S1:
9300 We first create the vector stmt VS1_0, and, as usual, record a
9301 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9302 Next, we create the vector stmt VS1_1, and record a pointer to
9303 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9304 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9306 RELATED_STMT VEC_STMT
9307 VS1_0: vx0 = memref0 VS1_1 -
9308 VS1_1: vx1 = memref1 VS1_2 -
9309 VS1_2: vx2 = memref2 VS1_3 -
9310 VS1_3: vx3 = memref3 - -
9311 S1: x = load - VS1_0
9314 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9315 information we recorded in RELATED_STMT field is used to vectorize
9318 /* In case of interleaving (non-unit grouped access):
9325 Vectorized loads are created in the order of memory accesses
9326 starting from the access of the first stmt of the chain:
9329 VS2: vx1 = &base + vec_size*1
9330 VS3: vx3 = &base + vec_size*2
9331 VS4: vx4 = &base + vec_size*3
9333 Then permutation statements are generated:
9335 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9336 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9339 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9340 (the order of the data-refs in the output of vect_permute_load_chain
9341 corresponds to the order of scalar stmts in the interleaving chain - see
9342 the documentation of vect_permute_load_chain()).
9343 The generation of permutation stmts and recording them in
9344 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9346 In case of both multiple types and interleaving, the vector loads and
9347 permutation stmts above are created for every copy. The result vector
9348 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9349 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9351 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9352 on a target that supports unaligned accesses (dr_unaligned_supported)
9353 we generate the following code:
9357 p = p + indx * vectype_size;
9362 Otherwise, the data reference is potentially unaligned on a target that
9363 does not support unaligned accesses (dr_explicit_realign_optimized) -
9364 then generate the following code, in which the data in each iteration is
9365 obtained by two vector loads, one from the previous iteration, and one
9366 from the current iteration:
9368 msq_init = *(floor(p1))
9369 p2 = initial_addr + VS - 1;
9370 realignment_token = call target_builtin;
9373 p2 = p2 + indx * vectype_size
9375 vec_dest = realign_load (msq, lsq, realignment_token)
9380 /* If the misalignment remains the same throughout the execution of the
9381 loop, we can create the init_addr and permutation mask at the loop
9382 preheader. Otherwise, it needs to be created inside the loop.
9383 This can only occur when vectorizing memory accesses in the inner-loop
9384 nested within an outer-loop that is being vectorized. */
9386 if (nested_in_vect_loop
9387 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9388 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9390 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9391 compute_in_loop
= true;
9394 bool diff_first_stmt_info
9395 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9397 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9398 || alignment_support_scheme
== dr_explicit_realign
)
9399 && !compute_in_loop
)
9401 /* If we have different first_stmt_info, we can't set up realignment
9402 here, since we can't guarantee first_stmt_info DR has been
9403 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9404 distance from first_stmt_info DR instead as below. */
9405 if (!diff_first_stmt_info
)
9406 msq
= vect_setup_realignment (vinfo
,
9407 first_stmt_info
, gsi
, &realignment_token
,
9408 alignment_support_scheme
, NULL_TREE
,
9410 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9412 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9413 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9415 gcc_assert (!first_stmt_info_for_drptr
);
9421 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9422 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9425 tree vec_offset
= NULL_TREE
;
9426 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9428 aggr_type
= NULL_TREE
;
9431 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9433 aggr_type
= elem_type
;
9434 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9435 &bump
, &vec_offset
);
9439 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9440 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9442 aggr_type
= vectype
;
9443 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9444 memory_access_type
);
9447 tree vec_mask
= NULL_TREE
;
9448 prev_stmt_info
= NULL
;
9449 poly_uint64 group_elt
= 0;
9450 for (j
= 0; j
< ncopies
; j
++)
9452 stmt_vec_info new_stmt_info
= NULL
;
9453 /* 1. Create the vector or array pointer update chain. */
9456 bool simd_lane_access_p
9457 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9458 if (simd_lane_access_p
9459 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9460 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9461 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9462 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9463 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9464 get_alias_set (TREE_TYPE (ref_type
)))
9465 && (alignment_support_scheme
== dr_aligned
9466 || alignment_support_scheme
== dr_unaligned_supported
))
9468 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9469 dataref_offset
= build_int_cst (ref_type
, 0);
9471 else if (diff_first_stmt_info
)
9474 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9475 aggr_type
, at_loop
, offset
, &dummy
,
9476 gsi
, &ptr_incr
, simd_lane_access_p
,
9478 /* Adjust the pointer by the difference to first_stmt. */
9479 data_reference_p ptrdr
9480 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9482 = fold_convert (sizetype
,
9483 size_binop (MINUS_EXPR
,
9484 DR_INIT (first_dr_info
->dr
),
9486 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9488 if (alignment_support_scheme
== dr_explicit_realign
)
9490 msq
= vect_setup_realignment (vinfo
,
9491 first_stmt_info_for_drptr
, gsi
,
9493 alignment_support_scheme
,
9494 dataref_ptr
, &at_loop
);
9495 gcc_assert (!compute_in_loop
);
9498 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9499 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
9500 &dataref_ptr
, &vec_offset
);
9503 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9505 offset
, &dummy
, gsi
, &ptr_incr
,
9512 auto_vec
<vec
<tree
> > vec_defs (1);
9513 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
9514 vec_mask
= vec_defs
[0][0];
9517 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
,
9524 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9526 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9527 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
9529 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9532 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
9535 if (grouped_load
|| slp_perm
)
9536 dr_chain
.create (vec_num
);
9538 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9542 vec_array
= create_vector_array (vectype
, vec_num
);
9544 tree final_mask
= NULL_TREE
;
9546 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9549 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9556 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9558 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9559 tree alias_ptr
= build_int_cst (ref_type
, align
);
9560 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9561 dataref_ptr
, alias_ptr
,
9567 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9568 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9569 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9571 gimple_call_set_lhs (call
, vec_array
);
9572 gimple_call_set_nothrow (call
, true);
9573 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
9576 /* Extract each vector into an SSA_NAME. */
9577 for (i
= 0; i
< vec_num
; i
++)
9579 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9581 dr_chain
.quick_push (new_temp
);
9584 /* Record the mapping between SSA_NAMEs and statements. */
9585 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9587 /* Record that VEC_ARRAY is now dead. */
9588 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9592 for (i
= 0; i
< vec_num
; i
++)
9594 tree final_mask
= NULL_TREE
;
9596 && memory_access_type
!= VMAT_INVARIANT
)
9597 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9599 vectype
, vec_num
* j
+ i
);
9601 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9605 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9606 gsi
, stmt_info
, bump
);
9608 /* 2. Create the vector-load in the loop. */
9609 gimple
*new_stmt
= NULL
;
9610 switch (alignment_support_scheme
)
9613 case dr_unaligned_supported
:
9615 unsigned int misalign
;
9616 unsigned HOST_WIDE_INT align
;
9618 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9620 tree zero
= build_zero_cst (vectype
);
9621 tree scale
= size_int (gs_info
.scale
);
9624 call
= gimple_build_call_internal
9625 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9626 vec_offset
, scale
, zero
, final_mask
);
9628 call
= gimple_build_call_internal
9629 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9630 vec_offset
, scale
, zero
);
9631 gimple_call_set_nothrow (call
, true);
9633 data_ref
= NULL_TREE
;
9638 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9639 if (alignment_support_scheme
== dr_aligned
)
9641 gcc_assert (aligned_access_p (first_dr_info
));
9644 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9646 align
= dr_alignment
9647 (vect_dr_behavior (vinfo
, first_dr_info
));
9651 misalign
= DR_MISALIGNMENT (first_dr_info
);
9652 if (dataref_offset
== NULL_TREE
9653 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9654 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9659 align
= least_bit_hwi (misalign
| align
);
9660 tree ptr
= build_int_cst (ref_type
, align
);
9662 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9665 gimple_call_set_nothrow (call
, true);
9667 data_ref
= NULL_TREE
;
9671 tree ltype
= vectype
;
9672 tree new_vtype
= NULL_TREE
;
9673 unsigned HOST_WIDE_INT gap
9674 = DR_GROUP_GAP (first_stmt_info
);
9675 unsigned int vect_align
9676 = vect_known_alignment_in_bytes (first_dr_info
);
9677 unsigned int scalar_dr_size
9678 = vect_get_scalar_dr_size (first_dr_info
);
9679 /* If there's no peeling for gaps but we have a gap
9680 with slp loads then load the lower half of the
9681 vector only. See get_group_load_store_type for
9682 when we apply this optimization. */
9685 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9687 && known_eq (nunits
, (group_size
- gap
) * 2)
9688 && known_eq (nunits
, group_size
)
9689 && gap
>= (vect_align
/ scalar_dr_size
))
9693 = vector_vector_composition_type (vectype
, 2,
9695 if (new_vtype
!= NULL_TREE
)
9699 = (dataref_offset
? dataref_offset
9700 : build_int_cst (ref_type
, 0));
9701 if (ltype
!= vectype
9702 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9704 unsigned HOST_WIDE_INT gap_offset
9705 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9706 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9707 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9710 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9711 if (alignment_support_scheme
== dr_aligned
)
9713 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9714 TREE_TYPE (data_ref
)
9715 = build_aligned_type (TREE_TYPE (data_ref
),
9716 align
* BITS_PER_UNIT
);
9718 TREE_TYPE (data_ref
)
9719 = build_aligned_type (TREE_TYPE (data_ref
),
9720 TYPE_ALIGN (elem_type
));
9721 if (ltype
!= vectype
)
9723 vect_copy_ref_info (data_ref
,
9724 DR_REF (first_dr_info
->dr
));
9725 tree tem
= make_ssa_name (ltype
);
9726 new_stmt
= gimple_build_assign (tem
, data_ref
);
9727 vect_finish_stmt_generation (vinfo
, stmt_info
,
9730 vec
<constructor_elt
, va_gc
> *v
;
9732 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9734 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9735 build_zero_cst (ltype
));
9736 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9740 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9741 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9742 build_zero_cst (ltype
));
9744 gcc_assert (new_vtype
!= NULL_TREE
);
9745 if (new_vtype
== vectype
)
9746 new_stmt
= gimple_build_assign (
9747 vec_dest
, build_constructor (vectype
, v
));
9750 tree new_vname
= make_ssa_name (new_vtype
);
9751 new_stmt
= gimple_build_assign (
9752 new_vname
, build_constructor (new_vtype
, v
));
9753 vect_finish_stmt_generation (vinfo
, stmt_info
,
9755 new_stmt
= gimple_build_assign (
9756 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9763 case dr_explicit_realign
:
9767 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9769 if (compute_in_loop
)
9770 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9772 dr_explicit_realign
,
9775 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9776 ptr
= copy_ssa_name (dataref_ptr
);
9778 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9779 // For explicit realign the target alignment should be
9780 // known at compile time.
9781 unsigned HOST_WIDE_INT align
=
9782 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9783 new_stmt
= gimple_build_assign
9784 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9786 (TREE_TYPE (dataref_ptr
),
9787 -(HOST_WIDE_INT
) align
));
9788 vect_finish_stmt_generation (vinfo
, stmt_info
,
9791 = build2 (MEM_REF
, vectype
, ptr
,
9792 build_int_cst (ref_type
, 0));
9793 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9794 vec_dest
= vect_create_destination_var (scalar_dest
,
9796 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9797 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9798 gimple_assign_set_lhs (new_stmt
, new_temp
);
9799 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9800 vect_finish_stmt_generation (vinfo
, stmt_info
,
9804 bump
= size_binop (MULT_EXPR
, vs
,
9805 TYPE_SIZE_UNIT (elem_type
));
9806 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9807 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9809 new_stmt
= gimple_build_assign
9810 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9812 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9813 ptr
= copy_ssa_name (ptr
, new_stmt
);
9814 gimple_assign_set_lhs (new_stmt
, ptr
);
9815 vect_finish_stmt_generation (vinfo
, stmt_info
,
9818 = build2 (MEM_REF
, vectype
, ptr
,
9819 build_int_cst (ref_type
, 0));
9822 case dr_explicit_realign_optimized
:
9824 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9825 new_temp
= copy_ssa_name (dataref_ptr
);
9827 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9828 // We should only be doing this if we know the target
9829 // alignment at compile time.
9830 unsigned HOST_WIDE_INT align
=
9831 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9832 new_stmt
= gimple_build_assign
9833 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9834 build_int_cst (TREE_TYPE (dataref_ptr
),
9835 -(HOST_WIDE_INT
) align
));
9836 vect_finish_stmt_generation (vinfo
, stmt_info
,
9839 = build2 (MEM_REF
, vectype
, new_temp
,
9840 build_int_cst (ref_type
, 0));
9846 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9847 /* DATA_REF is null if we've already built the statement. */
9850 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9851 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9853 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9854 gimple_set_lhs (new_stmt
, new_temp
);
9856 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9859 /* 3. Handle explicit realignment if necessary/supported.
9861 vec_dest = realign_load (msq, lsq, realignment_token) */
9862 if (alignment_support_scheme
== dr_explicit_realign_optimized
9863 || alignment_support_scheme
== dr_explicit_realign
)
9865 lsq
= gimple_assign_lhs (new_stmt
);
9866 if (!realignment_token
)
9867 realignment_token
= dataref_ptr
;
9868 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9869 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9870 msq
, lsq
, realignment_token
);
9871 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9872 gimple_assign_set_lhs (new_stmt
, new_temp
);
9874 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9877 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9880 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9881 add_phi_arg (phi
, lsq
,
9882 loop_latch_edge (containing_loop
),
9888 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9890 tree perm_mask
= perm_mask_for_reverse (vectype
);
9891 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9892 perm_mask
, stmt_info
, gsi
);
9893 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9896 /* Collect vector loads and later create their permutation in
9897 vect_transform_grouped_load (). */
9898 if (grouped_load
|| slp_perm
)
9899 dr_chain
.quick_push (new_temp
);
9901 /* Store vector loads in the corresponding SLP_NODE. */
9902 if (slp
&& !slp_perm
)
9903 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9905 /* With SLP permutation we load the gaps as well, without
9906 we need to skip the gaps after we manage to fully load
9907 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9908 group_elt
+= nunits
;
9909 if (maybe_ne (group_gap_adj
, 0U)
9911 && known_eq (group_elt
, group_size
- group_gap_adj
))
9913 poly_wide_int bump_val
9914 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9916 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9917 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9918 gsi
, stmt_info
, bump
);
9922 /* Bump the vector pointer to account for a gap or for excess
9923 elements loaded for a permuted SLP load. */
9924 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9926 poly_wide_int bump_val
9927 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9929 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9930 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9935 if (slp
&& !slp_perm
)
9941 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
9942 gsi
, vf
, false, &n_perms
);
9949 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9950 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9952 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9957 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9959 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9960 prev_stmt_info
= new_stmt_info
;
9963 dr_chain
.release ();
9969 /* Function vect_is_simple_cond.
9972 LOOP - the loop that is being vectorized.
9973 COND - Condition that is checked for simple use.
9976 *COMP_VECTYPE - the vector type for the comparison.
9977 *DTS - The def types for the arguments of the comparison
9979 Returns whether a COND can be vectorized. Checks whether
9980 condition operands are supportable using vec_is_simple_use. */
9983 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
9984 slp_tree slp_node
, tree
*comp_vectype
,
9985 enum vect_def_type
*dts
, tree vectype
)
9988 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9992 if (TREE_CODE (cond
) == SSA_NAME
9993 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9995 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
9996 &slp_op
, &dts
[0], comp_vectype
)
9998 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
10003 if (!COMPARISON_CLASS_P (cond
))
10006 lhs
= TREE_OPERAND (cond
, 0);
10007 rhs
= TREE_OPERAND (cond
, 1);
10009 if (TREE_CODE (lhs
) == SSA_NAME
)
10011 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
10012 &lhs
, &slp_op
, &dts
[0], &vectype1
))
10015 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10016 || TREE_CODE (lhs
) == FIXED_CST
)
10017 dts
[0] = vect_constant_def
;
10021 if (TREE_CODE (rhs
) == SSA_NAME
)
10023 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
10024 &rhs
, &slp_op
, &dts
[1], &vectype2
))
10027 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10028 || TREE_CODE (rhs
) == FIXED_CST
)
10029 dts
[1] = vect_constant_def
;
10033 if (vectype1
&& vectype2
10034 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10035 TYPE_VECTOR_SUBPARTS (vectype2
)))
10038 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10039 /* Invariant comparison. */
10040 if (! *comp_vectype
)
10042 tree scalar_type
= TREE_TYPE (lhs
);
10043 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10044 *comp_vectype
= truth_type_for (vectype
);
10047 /* If we can widen the comparison to match vectype do so. */
10048 if (INTEGRAL_TYPE_P (scalar_type
)
10050 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10051 TYPE_SIZE (TREE_TYPE (vectype
))))
10052 scalar_type
= build_nonstandard_integer_type
10053 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10054 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10062 /* vectorizable_condition.
10064 Check if STMT_INFO is conditional modify expression that can be vectorized.
10065 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10066 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10069 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10071 Return true if STMT_INFO is vectorizable in this way. */
10074 vectorizable_condition (vec_info
*vinfo
,
10075 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10076 stmt_vec_info
*vec_stmt
,
10077 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10079 tree scalar_dest
= NULL_TREE
;
10080 tree vec_dest
= NULL_TREE
;
10081 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10082 tree then_clause
, else_clause
;
10083 tree comp_vectype
= NULL_TREE
;
10084 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10085 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10088 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10089 enum vect_def_type dts
[4]
10090 = {vect_unknown_def_type
, vect_unknown_def_type
,
10091 vect_unknown_def_type
, vect_unknown_def_type
};
10095 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10096 stmt_vec_info prev_stmt_info
= NULL
;
10098 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10099 vec
<tree
> vec_oprnds0
= vNULL
;
10100 vec
<tree
> vec_oprnds1
= vNULL
;
10101 vec
<tree
> vec_oprnds2
= vNULL
;
10102 vec
<tree
> vec_oprnds3
= vNULL
;
10104 bool masked
= false;
10106 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10109 /* Is vectorizable conditional operation? */
10110 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10114 code
= gimple_assign_rhs_code (stmt
);
10115 if (code
!= COND_EXPR
)
10118 stmt_vec_info reduc_info
= NULL
;
10119 int reduc_index
= -1;
10120 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10122 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10125 if (STMT_SLP_TYPE (stmt_info
))
10127 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10128 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10129 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10130 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10131 || reduc_index
!= -1);
10135 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10138 /* FORNOW: only supported as part of a reduction. */
10139 if (STMT_VINFO_LIVE_P (stmt_info
))
10141 if (dump_enabled_p ())
10142 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10143 "value used after loop.\n");
10148 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10149 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10154 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10158 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10162 gcc_assert (ncopies
>= 1);
10163 if (for_reduction
&& ncopies
> 1)
10164 return false; /* FORNOW */
10166 cond_expr
= gimple_assign_rhs1 (stmt
);
10168 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10169 &comp_vectype
, &dts
[0], vectype
)
10173 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10174 slp_tree then_slp_node
, else_slp_node
;
10175 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10176 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10178 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10179 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10182 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10185 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10188 masked
= !COMPARISON_CLASS_P (cond_expr
);
10189 vec_cmp_type
= truth_type_for (comp_vectype
);
10191 if (vec_cmp_type
== NULL_TREE
)
10194 cond_code
= TREE_CODE (cond_expr
);
10197 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10198 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10201 /* For conditional reductions, the "then" value needs to be the candidate
10202 value calculated by this iteration while the "else" value needs to be
10203 the result carried over from previous iterations. If the COND_EXPR
10204 is the other way around, we need to swap it. */
10205 bool must_invert_cmp_result
= false;
10206 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10209 must_invert_cmp_result
= true;
10212 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10213 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10214 if (new_code
== ERROR_MARK
)
10215 must_invert_cmp_result
= true;
10218 cond_code
= new_code
;
10219 /* Make sure we don't accidentally use the old condition. */
10220 cond_expr
= NULL_TREE
;
10223 std::swap (then_clause
, else_clause
);
10226 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10228 /* Boolean values may have another representation in vectors
10229 and therefore we prefer bit operations over comparison for
10230 them (which also works for scalar masks). We store opcodes
10231 to use in bitop1 and bitop2. Statement is vectorized as
10232 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10233 depending on bitop1 and bitop2 arity. */
10237 bitop1
= BIT_NOT_EXPR
;
10238 bitop2
= BIT_AND_EXPR
;
10241 bitop1
= BIT_NOT_EXPR
;
10242 bitop2
= BIT_IOR_EXPR
;
10245 bitop1
= BIT_NOT_EXPR
;
10246 bitop2
= BIT_AND_EXPR
;
10247 std::swap (cond_expr0
, cond_expr1
);
10250 bitop1
= BIT_NOT_EXPR
;
10251 bitop2
= BIT_IOR_EXPR
;
10252 std::swap (cond_expr0
, cond_expr1
);
10255 bitop1
= BIT_XOR_EXPR
;
10258 bitop1
= BIT_XOR_EXPR
;
10259 bitop2
= BIT_NOT_EXPR
;
10264 cond_code
= SSA_NAME
;
10267 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10268 && reduction_type
== EXTRACT_LAST_REDUCTION
10269 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10271 if (dump_enabled_p ())
10272 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10273 "reduction comparison operation not supported.\n");
10279 if (bitop1
!= NOP_EXPR
)
10281 machine_mode mode
= TYPE_MODE (comp_vectype
);
10284 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10285 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10288 if (bitop2
!= NOP_EXPR
)
10290 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10292 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10297 vect_cost_for_stmt kind
= vector_stmt
;
10298 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10299 /* Count one reduction-like operation per vector. */
10300 kind
= vec_to_scalar
;
10301 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10305 && (!vect_maybe_update_slp_op_vectype
10306 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10308 && !vect_maybe_update_slp_op_vectype
10309 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10310 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10311 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10313 if (dump_enabled_p ())
10314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10315 "incompatible vector types for invariants\n");
10320 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
10321 && reduction_type
== EXTRACT_LAST_REDUCTION
)
10322 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10323 ncopies
* vec_num
, vectype
, NULL
);
10325 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10326 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10335 vec_oprnds0
.create (1);
10336 vec_oprnds1
.create (1);
10337 vec_oprnds2
.create (1);
10338 vec_oprnds3
.create (1);
10342 scalar_dest
= gimple_assign_lhs (stmt
);
10343 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10344 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10346 /* Handle cond expr. */
10347 for (j
= 0; j
< ncopies
; j
++)
10349 bool swap_cond_operands
= false;
10351 /* See whether another part of the vectorized code applies a loop
10352 mask to the condition, or to its inverse. */
10354 vec_loop_masks
*masks
= NULL
;
10355 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10357 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10358 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10361 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10362 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10363 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10366 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10367 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10368 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10370 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10371 cond_code
= cond
.code
;
10372 swap_cond_operands
= true;
10378 stmt_vec_info new_stmt_info
= NULL
;
10383 auto_vec
<vec
<tree
>, 4> vec_defs
;
10384 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
10385 vec_oprnds3
= vec_defs
.pop ();
10386 vec_oprnds2
= vec_defs
.pop ();
10388 vec_oprnds1
= vec_defs
.pop ();
10389 vec_oprnds0
= vec_defs
.pop ();
10396 = vect_get_vec_def_for_operand (vinfo
, cond_expr
, stmt_info
,
10402 = vect_get_vec_def_for_operand (vinfo
, cond_expr0
,
10403 stmt_info
, comp_vectype
);
10405 = vect_get_vec_def_for_operand (vinfo
, cond_expr1
,
10406 stmt_info
, comp_vectype
);
10408 vec_then_clause
= vect_get_vec_def_for_operand (vinfo
,
10411 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10412 vec_else_clause
= vect_get_vec_def_for_operand (vinfo
,
10420 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
10423 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
10425 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10426 vec_oprnds2
.pop ());
10427 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10428 vec_oprnds3
.pop ());
10433 vec_oprnds0
.quick_push (vec_cond_lhs
);
10435 vec_oprnds1
.quick_push (vec_cond_rhs
);
10436 vec_oprnds2
.quick_push (vec_then_clause
);
10437 vec_oprnds3
.quick_push (vec_else_clause
);
10440 /* Arguments are ready. Create the new vector stmt. */
10441 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10443 vec_then_clause
= vec_oprnds2
[i
];
10444 vec_else_clause
= vec_oprnds3
[i
];
10446 if (swap_cond_operands
)
10447 std::swap (vec_then_clause
, vec_else_clause
);
10450 vec_compare
= vec_cond_lhs
;
10453 vec_cond_rhs
= vec_oprnds1
[i
];
10454 if (bitop1
== NOP_EXPR
)
10455 vec_compare
= build2 (cond_code
, vec_cmp_type
,
10456 vec_cond_lhs
, vec_cond_rhs
);
10459 new_temp
= make_ssa_name (vec_cmp_type
);
10461 if (bitop1
== BIT_NOT_EXPR
)
10462 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10466 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10468 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10469 if (bitop2
== NOP_EXPR
)
10470 vec_compare
= new_temp
;
10471 else if (bitop2
== BIT_NOT_EXPR
)
10473 /* Instead of doing ~x ? y : z do x ? z : y. */
10474 vec_compare
= new_temp
;
10475 std::swap (vec_then_clause
, vec_else_clause
);
10479 vec_compare
= make_ssa_name (vec_cmp_type
);
10481 = gimple_build_assign (vec_compare
, bitop2
,
10482 vec_cond_lhs
, new_temp
);
10483 vect_finish_stmt_generation (vinfo
, stmt_info
,
10489 /* If we decided to apply a loop mask to the result of the vector
10490 comparison, AND the comparison with the mask now. Later passes
10491 should then be able to reuse the AND results between mulitple
10495 for (int i = 0; i < 100; ++i)
10496 x[i] = y[i] ? z[i] : 10;
10498 results in following optimized GIMPLE:
10500 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10501 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10502 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10503 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10504 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10505 vect_iftmp.11_47, { 10, ... }>;
10507 instead of using a masked and unmasked forms of
10508 vec != { 0, ... } (masked in the MASK_LOAD,
10509 unmasked in the VEC_COND_EXPR). */
10511 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10512 in cases where that's necessary. */
10514 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10516 if (!is_gimple_val (vec_compare
))
10518 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10519 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10521 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10522 vec_compare
= vec_compare_name
;
10525 if (must_invert_cmp_result
)
10527 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10528 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10531 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10532 vec_compare
= vec_compare_name
;
10537 unsigned vec_num
= vec_oprnds0
.length ();
10539 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10540 vectype
, vec_num
* j
+ i
);
10541 tree tmp2
= make_ssa_name (vec_cmp_type
);
10543 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10545 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10546 vec_compare
= tmp2
;
10550 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10552 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10553 tree lhs
= gimple_get_lhs (old_stmt
);
10554 gcall
*new_stmt
= gimple_build_call_internal
10555 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10557 gimple_call_set_lhs (new_stmt
, lhs
);
10558 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10559 if (old_stmt
== gsi_stmt (*gsi
))
10560 new_stmt_info
= vect_finish_replace_stmt (vinfo
,
10561 stmt_info
, new_stmt
);
10564 /* In this case we're moving the definition to later in the
10565 block. That doesn't matter because the only uses of the
10566 lhs are in phi statements. */
10567 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10568 gsi_remove (&old_gsi
, true);
10570 = vect_finish_stmt_generation (vinfo
, stmt_info
,
10576 new_temp
= make_ssa_name (vec_dest
);
10578 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10579 vec_then_clause
, vec_else_clause
);
10581 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10584 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10591 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10593 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10595 prev_stmt_info
= new_stmt_info
;
10598 vec_oprnds0
.release ();
10599 vec_oprnds1
.release ();
10600 vec_oprnds2
.release ();
10601 vec_oprnds3
.release ();
10606 /* vectorizable_comparison.
10608 Check if STMT_INFO is comparison expression that can be vectorized.
10609 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10610 comparison, put it in VEC_STMT, and insert it at GSI.
10612 Return true if STMT_INFO is vectorizable in this way. */
10615 vectorizable_comparison (vec_info
*vinfo
,
10616 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10617 stmt_vec_info
*vec_stmt
,
10618 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10620 tree lhs
, rhs1
, rhs2
;
10621 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10622 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10623 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10625 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10626 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10628 poly_uint64 nunits
;
10630 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10631 stmt_vec_info prev_stmt_info
= NULL
;
10633 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10634 vec
<tree
> vec_oprnds0
= vNULL
;
10635 vec
<tree
> vec_oprnds1
= vNULL
;
10639 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10642 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10645 mask_type
= vectype
;
10646 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10651 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10653 gcc_assert (ncopies
>= 1);
10654 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10657 if (STMT_VINFO_LIVE_P (stmt_info
))
10659 if (dump_enabled_p ())
10660 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10661 "value used after loop.\n");
10665 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10669 code
= gimple_assign_rhs_code (stmt
);
10671 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10674 slp_tree slp_rhs1
, slp_rhs2
;
10675 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10676 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10679 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10680 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10683 if (vectype1
&& vectype2
10684 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10685 TYPE_VECTOR_SUBPARTS (vectype2
)))
10688 vectype
= vectype1
? vectype1
: vectype2
;
10690 /* Invariant comparison. */
10693 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10694 vectype
= mask_type
;
10696 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10698 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10701 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10704 /* Can't compare mask and non-mask types. */
10705 if (vectype1
&& vectype2
10706 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10709 /* Boolean values may have another representation in vectors
10710 and therefore we prefer bit operations over comparison for
10711 them (which also works for scalar masks). We store opcodes
10712 to use in bitop1 and bitop2. Statement is vectorized as
10713 BITOP2 (rhs1 BITOP1 rhs2) or
10714 rhs1 BITOP2 (BITOP1 rhs2)
10715 depending on bitop1 and bitop2 arity. */
10716 bool swap_p
= false;
10717 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10719 if (code
== GT_EXPR
)
10721 bitop1
= BIT_NOT_EXPR
;
10722 bitop2
= BIT_AND_EXPR
;
10724 else if (code
== GE_EXPR
)
10726 bitop1
= BIT_NOT_EXPR
;
10727 bitop2
= BIT_IOR_EXPR
;
10729 else if (code
== LT_EXPR
)
10731 bitop1
= BIT_NOT_EXPR
;
10732 bitop2
= BIT_AND_EXPR
;
10735 else if (code
== LE_EXPR
)
10737 bitop1
= BIT_NOT_EXPR
;
10738 bitop2
= BIT_IOR_EXPR
;
10743 bitop1
= BIT_XOR_EXPR
;
10744 if (code
== EQ_EXPR
)
10745 bitop2
= BIT_NOT_EXPR
;
10751 if (bitop1
== NOP_EXPR
)
10753 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10758 machine_mode mode
= TYPE_MODE (vectype
);
10761 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10762 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10765 if (bitop2
!= NOP_EXPR
)
10767 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10768 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10773 /* Put types on constant and invariant SLP children. */
10775 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10776 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10778 if (dump_enabled_p ())
10779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10780 "incompatible vector types for invariants\n");
10784 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10785 vect_model_simple_cost (vinfo
, stmt_info
,
10786 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10787 dts
, ndts
, slp_node
, cost_vec
);
10794 vec_oprnds0
.create (1);
10795 vec_oprnds1
.create (1);
10799 lhs
= gimple_assign_lhs (stmt
);
10800 mask
= vect_create_destination_var (lhs
, mask_type
);
10802 /* Handle cmp expr. */
10803 for (j
= 0; j
< ncopies
; j
++)
10805 stmt_vec_info new_stmt_info
= NULL
;
10810 auto_vec
<vec
<tree
>, 2> vec_defs
;
10811 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
10812 vec_oprnds1
= vec_defs
.pop ();
10813 vec_oprnds0
= vec_defs
.pop ();
10815 std::swap (vec_oprnds0
, vec_oprnds1
);
10819 vec_rhs1
= vect_get_vec_def_for_operand (vinfo
, rhs1
, stmt_info
,
10821 vec_rhs2
= vect_get_vec_def_for_operand (vinfo
, rhs2
, stmt_info
,
10827 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
10828 vec_oprnds0
.pop ());
10829 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
10830 vec_oprnds1
.pop ());
10835 if (swap_p
&& j
== 0)
10836 std::swap (vec_rhs1
, vec_rhs2
);
10837 vec_oprnds0
.quick_push (vec_rhs1
);
10838 vec_oprnds1
.quick_push (vec_rhs2
);
10841 /* Arguments are ready. Create the new vector stmt. */
10842 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10844 vec_rhs2
= vec_oprnds1
[i
];
10846 new_temp
= make_ssa_name (mask
);
10847 if (bitop1
== NOP_EXPR
)
10849 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
10850 vec_rhs1
, vec_rhs2
);
10852 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10857 if (bitop1
== BIT_NOT_EXPR
)
10858 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10860 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10863 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10864 if (bitop2
!= NOP_EXPR
)
10866 tree res
= make_ssa_name (mask
);
10867 if (bitop2
== BIT_NOT_EXPR
)
10868 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10870 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10873 = vect_finish_stmt_generation (vinfo
, stmt_info
,
10878 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10885 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10887 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10889 prev_stmt_info
= new_stmt_info
;
10892 vec_oprnds0
.release ();
10893 vec_oprnds1
.release ();
10898 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10899 can handle all live statements in the node. Otherwise return true
10900 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10901 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10904 can_vectorize_live_stmts (loop_vec_info loop_vinfo
,
10905 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10906 slp_tree slp_node
, slp_instance slp_node_instance
,
10908 stmt_vector_for_cost
*cost_vec
)
10912 stmt_vec_info slp_stmt_info
;
10914 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10916 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10917 && !vectorizable_live_operation (loop_vinfo
,
10918 slp_stmt_info
, gsi
, slp_node
,
10919 slp_node_instance
, i
,
10920 vec_stmt_p
, cost_vec
))
10924 else if (STMT_VINFO_LIVE_P (stmt_info
)
10925 && !vectorizable_live_operation (loop_vinfo
, stmt_info
, gsi
,
10926 slp_node
, slp_node_instance
, -1,
10927 vec_stmt_p
, cost_vec
))
10933 /* Make sure the statement is vectorizable. */
10936 vect_analyze_stmt (vec_info
*vinfo
,
10937 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10938 slp_tree node
, slp_instance node_instance
,
10939 stmt_vector_for_cost
*cost_vec
)
10941 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10942 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10944 gimple_seq pattern_def_seq
;
10946 if (dump_enabled_p ())
10947 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10950 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10951 return opt_result::failure_at (stmt_info
->stmt
,
10953 " stmt has volatile operands: %G\n",
10956 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10958 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10960 gimple_stmt_iterator si
;
10962 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10964 stmt_vec_info pattern_def_stmt_info
10965 = vinfo
->lookup_stmt (gsi_stmt (si
));
10966 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10967 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10969 /* Analyze def stmt of STMT if it's a pattern stmt. */
10970 if (dump_enabled_p ())
10971 dump_printf_loc (MSG_NOTE
, vect_location
,
10972 "==> examining pattern def statement: %G",
10973 pattern_def_stmt_info
->stmt
);
10976 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10977 need_to_vectorize
, node
, node_instance
,
10985 /* Skip stmts that do not need to be vectorized. In loops this is expected
10987 - the COND_EXPR which is the loop exit condition
10988 - any LABEL_EXPRs in the loop
10989 - computations that are used only for array indexing or loop control.
10990 In basic blocks we only analyze statements that are a part of some SLP
10991 instance, therefore, all the statements are relevant.
10993 Pattern statement needs to be analyzed instead of the original statement
10994 if the original statement is not relevant. Otherwise, we analyze both
10995 statements. In basic blocks we are called from some SLP instance
10996 traversal, don't analyze pattern stmts instead, the pattern stmts
10997 already will be part of SLP instance. */
10999 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
11000 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
11001 && !STMT_VINFO_LIVE_P (stmt_info
))
11003 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11004 && pattern_stmt_info
11005 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11006 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11008 /* Analyze PATTERN_STMT instead of the original stmt. */
11009 stmt_info
= pattern_stmt_info
;
11010 if (dump_enabled_p ())
11011 dump_printf_loc (MSG_NOTE
, vect_location
,
11012 "==> examining pattern statement: %G",
11017 if (dump_enabled_p ())
11018 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
11020 return opt_result::success ();
11023 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11025 && pattern_stmt_info
11026 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11027 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11029 /* Analyze PATTERN_STMT too. */
11030 if (dump_enabled_p ())
11031 dump_printf_loc (MSG_NOTE
, vect_location
,
11032 "==> examining pattern statement: %G",
11033 pattern_stmt_info
->stmt
);
11036 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
11037 node_instance
, cost_vec
);
11042 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
11044 case vect_internal_def
:
11047 case vect_reduction_def
:
11048 case vect_nested_cycle
:
11049 gcc_assert (!bb_vinfo
11050 && (relevance
== vect_used_in_outer
11051 || relevance
== vect_used_in_outer_by_reduction
11052 || relevance
== vect_used_by_reduction
11053 || relevance
== vect_unused_in_scope
11054 || relevance
== vect_used_only_live
));
11057 case vect_induction_def
:
11058 gcc_assert (!bb_vinfo
);
11061 case vect_constant_def
:
11062 case vect_external_def
:
11063 case vect_unknown_def_type
:
11065 gcc_unreachable ();
11068 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11070 tree type
= gimple_expr_type (stmt_info
->stmt
);
11071 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
11072 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11073 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11074 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11075 *need_to_vectorize
= true;
11078 if (PURE_SLP_STMT (stmt_info
) && !node
)
11080 if (dump_enabled_p ())
11081 dump_printf_loc (MSG_NOTE
, vect_location
,
11082 "handled only by SLP analysis\n");
11083 return opt_result::success ();
11088 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11089 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11090 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11091 -mveclibabi= takes preference over library functions with
11092 the simd attribute. */
11093 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11094 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11096 || vectorizable_conversion (vinfo
, stmt_info
,
11097 NULL
, NULL
, node
, cost_vec
)
11098 || vectorizable_operation (vinfo
, stmt_info
,
11099 NULL
, NULL
, node
, cost_vec
)
11100 || vectorizable_assignment (vinfo
, stmt_info
,
11101 NULL
, NULL
, node
, cost_vec
)
11102 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11103 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11104 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11105 node
, node_instance
, cost_vec
)
11106 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11107 NULL
, NULL
, node
, cost_vec
)
11108 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11109 || vectorizable_condition (vinfo
, stmt_info
,
11110 NULL
, NULL
, node
, cost_vec
)
11111 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11113 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11114 stmt_info
, NULL
, node
));
11118 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11119 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11120 NULL
, NULL
, node
, cost_vec
)
11121 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11123 || vectorizable_shift (vinfo
, stmt_info
,
11124 NULL
, NULL
, node
, cost_vec
)
11125 || vectorizable_operation (vinfo
, stmt_info
,
11126 NULL
, NULL
, node
, cost_vec
)
11127 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11129 || vectorizable_load (vinfo
, stmt_info
,
11130 NULL
, NULL
, node
, cost_vec
)
11131 || vectorizable_store (vinfo
, stmt_info
,
11132 NULL
, NULL
, node
, cost_vec
)
11133 || vectorizable_condition (vinfo
, stmt_info
,
11134 NULL
, NULL
, node
, cost_vec
)
11135 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11140 return opt_result::failure_at (stmt_info
->stmt
,
11142 " relevant stmt not supported: %G",
11145 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11146 need extra handling, except for vectorizable reductions. */
11148 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11149 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11150 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11151 stmt_info
, NULL
, node
, node_instance
,
11153 return opt_result::failure_at (stmt_info
->stmt
,
11155 " live stmt not supported: %G",
11158 return opt_result::success ();
11162 /* Function vect_transform_stmt.
11164 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11167 vect_transform_stmt (vec_info
*vinfo
,
11168 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11169 slp_tree slp_node
, slp_instance slp_node_instance
)
11171 bool is_store
= false;
11172 stmt_vec_info vec_stmt
= NULL
;
11175 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11176 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
11178 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
11179 bool nested_p
= (loop_vinfo
11180 && nested_in_vect_loop_p
11181 (LOOP_VINFO_LOOP (loop_vinfo
), stmt_info
));
11183 gimple
*stmt
= stmt_info
->stmt
;
11184 switch (STMT_VINFO_TYPE (stmt_info
))
11186 case type_demotion_vec_info_type
:
11187 case type_promotion_vec_info_type
:
11188 case type_conversion_vec_info_type
:
11189 done
= vectorizable_conversion (vinfo
, stmt_info
,
11190 gsi
, &vec_stmt
, slp_node
, NULL
);
11194 case induc_vec_info_type
:
11195 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11196 stmt_info
, gsi
, &vec_stmt
, slp_node
,
11201 case shift_vec_info_type
:
11202 done
= vectorizable_shift (vinfo
, stmt_info
,
11203 gsi
, &vec_stmt
, slp_node
, NULL
);
11207 case op_vec_info_type
:
11208 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11213 case assignment_vec_info_type
:
11214 done
= vectorizable_assignment (vinfo
, stmt_info
,
11215 gsi
, &vec_stmt
, slp_node
, NULL
);
11219 case load_vec_info_type
:
11220 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11225 case store_vec_info_type
:
11226 done
= vectorizable_store (vinfo
, stmt_info
,
11227 gsi
, &vec_stmt
, slp_node
, NULL
);
11229 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11231 /* In case of interleaving, the whole chain is vectorized when the
11232 last store in the chain is reached. Store stmts before the last
11233 one are skipped, and there vec_stmt_info shouldn't be freed
11235 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11236 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11243 case condition_vec_info_type
:
11244 done
= vectorizable_condition (vinfo
, stmt_info
,
11245 gsi
, &vec_stmt
, slp_node
, NULL
);
11249 case comparison_vec_info_type
:
11250 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11255 case call_vec_info_type
:
11256 done
= vectorizable_call (vinfo
, stmt_info
,
11257 gsi
, &vec_stmt
, slp_node
, NULL
);
11258 stmt
= gsi_stmt (*gsi
);
11261 case call_simd_clone_vec_info_type
:
11262 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11264 stmt
= gsi_stmt (*gsi
);
11267 case reduc_vec_info_type
:
11268 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11269 gsi
, &vec_stmt
, slp_node
);
11273 case cycle_phi_info_type
:
11274 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11275 &vec_stmt
, slp_node
, slp_node_instance
);
11279 case lc_phi_info_type
:
11280 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11281 stmt_info
, &vec_stmt
, slp_node
);
11286 if (!STMT_VINFO_LIVE_P (stmt_info
))
11288 if (dump_enabled_p ())
11289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11290 "stmt not supported.\n");
11291 gcc_unreachable ();
11296 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11297 This would break hybrid SLP vectorization. */
11299 gcc_assert (!vec_stmt
11300 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
11302 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11303 is being vectorized, but outside the immediately enclosing loop. */
11306 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11307 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
11308 || STMT_VINFO_RELEVANT (stmt_info
) ==
11309 vect_used_in_outer_by_reduction
))
11311 class loop
*innerloop
= LOOP_VINFO_LOOP (loop_vinfo
)->inner
;
11312 imm_use_iterator imm_iter
;
11313 use_operand_p use_p
;
11316 if (dump_enabled_p ())
11317 dump_printf_loc (MSG_NOTE
, vect_location
,
11318 "Record the vdef for outer-loop vectorization.\n");
11320 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11321 (to be used when vectorizing outer-loop stmts that use the DEF of
11323 if (gimple_code (stmt
) == GIMPLE_PHI
)
11324 scalar_dest
= PHI_RESULT (stmt
);
11326 scalar_dest
= gimple_get_lhs (stmt
);
11328 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
11329 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
11331 stmt_vec_info exit_phi_info
11332 = vinfo
->lookup_stmt (USE_STMT (use_p
));
11333 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
11338 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
11340 if (STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
11343 /* If this stmt defines a value used on a backedge, update the
11344 vectorized PHIs. */
11345 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
11346 stmt_vec_info reduc_info
;
11347 if (STMT_VINFO_REDUC_DEF (orig_stmt_info
)
11348 && vect_stmt_to_vectorize (orig_stmt_info
) == stmt_info
11349 && (reduc_info
= info_for_reduction (vinfo
, orig_stmt_info
))
11350 && STMT_VINFO_REDUC_TYPE (reduc_info
) != FOLD_LEFT_REDUCTION
11351 && STMT_VINFO_REDUC_TYPE (reduc_info
) != EXTRACT_LAST_REDUCTION
)
11356 && (phi
= dyn_cast
<gphi
*>
11357 (STMT_VINFO_REDUC_DEF (orig_stmt_info
)->stmt
))
11358 && dominated_by_p (CDI_DOMINATORS
,
11359 gimple_bb (orig_stmt_info
->stmt
), gimple_bb (phi
))
11360 && (e
= loop_latch_edge (gimple_bb (phi
)->loop_father
))
11361 && (PHI_ARG_DEF_FROM_EDGE (phi
, e
)
11362 == gimple_get_lhs (orig_stmt_info
->stmt
)))
11364 stmt_vec_info phi_info
11365 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info
));
11366 stmt_vec_info vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
11369 add_phi_arg (as_a
<gphi
*> (phi_info
->stmt
),
11370 gimple_get_lhs (vec_stmt
->stmt
), e
,
11371 gimple_phi_arg_location (phi
, e
->dest_idx
));
11372 phi_info
= STMT_VINFO_RELATED_STMT (phi_info
);
11373 vec_stmt
= STMT_VINFO_RELATED_STMT (vec_stmt
);
11376 gcc_assert (!vec_stmt
);
11379 && slp_node
!= slp_node_instance
->reduc_phis
)
11381 slp_tree phi_node
= slp_node_instance
->reduc_phis
;
11382 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_SCALAR_STMTS (phi_node
)[0]->stmt
);
11383 e
= loop_latch_edge (gimple_bb (phi
)->loop_father
);
11384 gcc_assert (SLP_TREE_VEC_STMTS (phi_node
).length ()
11385 == SLP_TREE_VEC_STMTS (slp_node
).length ());
11386 for (unsigned i
= 0; i
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++i
)
11387 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[i
]->stmt
),
11388 vect_get_slp_vect_def (slp_node
, i
),
11389 e
, gimple_phi_arg_location (phi
, e
->dest_idx
));
11393 /* Handle stmts whose DEF is used outside the loop-nest that is
11394 being vectorized. */
11395 if (is_a
<loop_vec_info
> (vinfo
))
11396 done
= can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11397 stmt_info
, gsi
, slp_node
,
11398 slp_node_instance
, true, NULL
);
11405 /* Remove a group of stores (for SLP or interleaving), free their
11409 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11411 stmt_vec_info next_stmt_info
= first_stmt_info
;
11413 while (next_stmt_info
)
11415 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11416 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11417 /* Free the attached stmt_vec_info and remove the stmt. */
11418 vinfo
->remove_stmt (next_stmt_info
);
11419 next_stmt_info
= tmp
;
11423 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11424 elements of type SCALAR_TYPE, or null if the target doesn't support
11427 If NUNITS is zero, return a vector type that contains elements of
11428 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11430 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11431 for this vectorization region and want to "autodetect" the best choice.
11432 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11433 and we want the new type to be interoperable with it. PREVAILING_MODE
11434 in this case can be a scalar integer mode or a vector mode; when it
11435 is a vector mode, the function acts like a tree-level version of
11436 related_vector_mode. */
11439 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11440 tree scalar_type
, poly_uint64 nunits
)
11442 tree orig_scalar_type
= scalar_type
;
11443 scalar_mode inner_mode
;
11444 machine_mode simd_mode
;
11447 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11448 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11451 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11453 /* For vector types of elements whose mode precision doesn't
11454 match their types precision we use a element type of mode
11455 precision. The vectorization routines will have to make sure
11456 they support the proper result truncation/extension.
11457 We also make sure to build vector types with INTEGER_TYPE
11458 component type only. */
11459 if (INTEGRAL_TYPE_P (scalar_type
)
11460 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11461 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11462 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11463 TYPE_UNSIGNED (scalar_type
));
11465 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11466 When the component mode passes the above test simply use a type
11467 corresponding to that mode. The theory is that any use that
11468 would cause problems with this will disable vectorization anyway. */
11469 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11470 && !INTEGRAL_TYPE_P (scalar_type
))
11471 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11473 /* We can't build a vector type of elements with alignment bigger than
11475 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11476 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11477 TYPE_UNSIGNED (scalar_type
));
11479 /* If we felt back to using the mode fail if there was
11480 no scalar type for it. */
11481 if (scalar_type
== NULL_TREE
)
11484 /* If no prevailing mode was supplied, use the mode the target prefers.
11485 Otherwise lookup a vector mode based on the prevailing mode. */
11486 if (prevailing_mode
== VOIDmode
)
11488 gcc_assert (known_eq (nunits
, 0U));
11489 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11490 if (SCALAR_INT_MODE_P (simd_mode
))
11492 /* Traditional behavior is not to take the integer mode
11493 literally, but simply to use it as a way of determining
11494 the vector size. It is up to mode_for_vector to decide
11495 what the TYPE_MODE should be.
11497 Note that nunits == 1 is allowed in order to support single
11498 element vector types. */
11499 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11500 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11504 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11505 || !related_vector_mode (prevailing_mode
,
11506 inner_mode
, nunits
).exists (&simd_mode
))
11508 /* Fall back to using mode_for_vector, mostly in the hope of being
11509 able to use an integer mode. */
11510 if (known_eq (nunits
, 0U)
11511 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11514 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11518 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11520 /* In cases where the mode was chosen by mode_for_vector, check that
11521 the target actually supports the chosen mode, or that it at least
11522 allows the vector mode to be replaced by a like-sized integer. */
11523 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11524 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11527 /* Re-attach the address-space qualifier if we canonicalized the scalar
11529 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11530 return build_qualified_type
11531 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11536 /* Function get_vectype_for_scalar_type.
11538 Returns the vector type corresponding to SCALAR_TYPE as supported
11539 by the target. If GROUP_SIZE is nonzero and we're performing BB
11540 vectorization, make sure that the number of elements in the vector
11541 is no bigger than GROUP_SIZE. */
11544 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11545 unsigned int group_size
)
11547 /* For BB vectorization, we should always have a group size once we've
11548 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11549 are tentative requests during things like early data reference
11550 analysis and pattern recognition. */
11551 if (is_a
<bb_vec_info
> (vinfo
))
11552 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11556 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11558 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11559 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11561 /* Register the natural choice of vector type, before the group size
11562 has been applied. */
11564 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11566 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11567 try again with an explicit number of elements. */
11570 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11572 /* Start with the biggest number of units that fits within
11573 GROUP_SIZE and halve it until we find a valid vector type.
11574 Usually either the first attempt will succeed or all will
11575 fail (in the latter case because GROUP_SIZE is too small
11576 for the target), but it's possible that a target could have
11577 a hole between supported vector types.
11579 If GROUP_SIZE is not a power of 2, this has the effect of
11580 trying the largest power of 2 that fits within the group,
11581 even though the group is not a multiple of that vector size.
11582 The BB vectorizer will then try to carve up the group into
11584 unsigned int nunits
= 1 << floor_log2 (group_size
);
11587 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11588 scalar_type
, nunits
);
11591 while (nunits
> 1 && !vectype
);
11597 /* Return the vector type corresponding to SCALAR_TYPE as supported
11598 by the target. NODE, if nonnull, is the SLP tree node that will
11599 use the returned vector type. */
11602 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11604 unsigned int group_size
= 0;
11606 group_size
= SLP_TREE_LANES (node
);
11607 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11610 /* Function get_mask_type_for_scalar_type.
11612 Returns the mask type corresponding to a result of comparison
11613 of vectors of specified SCALAR_TYPE as supported by target.
11614 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11615 make sure that the number of elements in the vector is no bigger
11616 than GROUP_SIZE. */
11619 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11620 unsigned int group_size
)
11622 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11627 return truth_type_for (vectype
);
11630 /* Function get_same_sized_vectype
11632 Returns a vector type corresponding to SCALAR_TYPE of size
11633 VECTOR_TYPE if supported by the target. */
11636 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11638 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11639 return truth_type_for (vector_type
);
11641 poly_uint64 nunits
;
11642 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11643 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11646 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11647 scalar_type
, nunits
);
11650 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11651 would not change the chosen vector modes. */
11654 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11656 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11657 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11658 if (!VECTOR_MODE_P (*i
)
11659 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11664 /* Function vect_is_simple_use.
11667 VINFO - the vect info of the loop or basic block that is being vectorized.
11668 OPERAND - operand in the loop or bb.
11670 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11671 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11672 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11673 the definition could be anywhere in the function
11674 DT - the type of definition
11676 Returns whether a stmt with OPERAND can be vectorized.
11677 For loops, supportable operands are constants, loop invariants, and operands
11678 that are defined by the current iteration of the loop. Unsupportable
11679 operands are those that are defined by a previous iteration of the loop (as
11680 is the case in reduction/induction computations).
11681 For basic blocks, supportable operands are constants and bb invariants.
11682 For now, operands defined outside the basic block are not supported. */
11685 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11686 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11688 if (def_stmt_info_out
)
11689 *def_stmt_info_out
= NULL
;
11691 *def_stmt_out
= NULL
;
11692 *dt
= vect_unknown_def_type
;
11694 if (dump_enabled_p ())
11696 dump_printf_loc (MSG_NOTE
, vect_location
,
11697 "vect_is_simple_use: operand ");
11698 if (TREE_CODE (operand
) == SSA_NAME
11699 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11700 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11702 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11705 if (CONSTANT_CLASS_P (operand
))
11706 *dt
= vect_constant_def
;
11707 else if (is_gimple_min_invariant (operand
))
11708 *dt
= vect_external_def
;
11709 else if (TREE_CODE (operand
) != SSA_NAME
)
11710 *dt
= vect_unknown_def_type
;
11711 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11712 *dt
= vect_external_def
;
11715 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11716 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11718 *dt
= vect_external_def
;
11721 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11722 def_stmt
= stmt_vinfo
->stmt
;
11723 switch (gimple_code (def_stmt
))
11726 case GIMPLE_ASSIGN
:
11728 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11731 *dt
= vect_unknown_def_type
;
11734 if (def_stmt_info_out
)
11735 *def_stmt_info_out
= stmt_vinfo
;
11738 *def_stmt_out
= def_stmt
;
11741 if (dump_enabled_p ())
11743 dump_printf (MSG_NOTE
, ", type of def: ");
11746 case vect_uninitialized_def
:
11747 dump_printf (MSG_NOTE
, "uninitialized\n");
11749 case vect_constant_def
:
11750 dump_printf (MSG_NOTE
, "constant\n");
11752 case vect_external_def
:
11753 dump_printf (MSG_NOTE
, "external\n");
11755 case vect_internal_def
:
11756 dump_printf (MSG_NOTE
, "internal\n");
11758 case vect_induction_def
:
11759 dump_printf (MSG_NOTE
, "induction\n");
11761 case vect_reduction_def
:
11762 dump_printf (MSG_NOTE
, "reduction\n");
11764 case vect_double_reduction_def
:
11765 dump_printf (MSG_NOTE
, "double reduction\n");
11767 case vect_nested_cycle
:
11768 dump_printf (MSG_NOTE
, "nested cycle\n");
11770 case vect_unknown_def_type
:
11771 dump_printf (MSG_NOTE
, "unknown\n");
11776 if (*dt
== vect_unknown_def_type
)
11778 if (dump_enabled_p ())
11779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11780 "Unsupported pattern.\n");
11787 /* Function vect_is_simple_use.
11789 Same as vect_is_simple_use but also determines the vector operand
11790 type of OPERAND and stores it to *VECTYPE. If the definition of
11791 OPERAND is vect_uninitialized_def, vect_constant_def or
11792 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11793 is responsible to compute the best suited vector type for the
11797 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11798 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11799 gimple
**def_stmt_out
)
11801 stmt_vec_info def_stmt_info
;
11803 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11807 *def_stmt_out
= def_stmt
;
11808 if (def_stmt_info_out
)
11809 *def_stmt_info_out
= def_stmt_info
;
11811 /* Now get a vector type if the def is internal, otherwise supply
11812 NULL_TREE and leave it up to the caller to figure out a proper
11813 type for the use stmt. */
11814 if (*dt
== vect_internal_def
11815 || *dt
== vect_induction_def
11816 || *dt
== vect_reduction_def
11817 || *dt
== vect_double_reduction_def
11818 || *dt
== vect_nested_cycle
)
11820 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11821 gcc_assert (*vectype
!= NULL_TREE
);
11822 if (dump_enabled_p ())
11823 dump_printf_loc (MSG_NOTE
, vect_location
,
11824 "vect_is_simple_use: vectype %T\n", *vectype
);
11826 else if (*dt
== vect_uninitialized_def
11827 || *dt
== vect_constant_def
11828 || *dt
== vect_external_def
)
11829 *vectype
= NULL_TREE
;
11831 gcc_unreachable ();
11836 /* Function vect_is_simple_use.
11838 Same as vect_is_simple_use but determines the operand by operand
11839 position OPERAND from either STMT or SLP_NODE, filling in *OP
11840 and *SLP_DEF (when SLP_NODE is not NULL). */
11843 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11844 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11845 enum vect_def_type
*dt
,
11846 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11850 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11852 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11853 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11856 if (def_stmt_info_out
)
11857 *def_stmt_info_out
= NULL
;
11858 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11859 *dt
= SLP_TREE_DEF_TYPE (child
);
11860 *vectype
= SLP_TREE_VECTYPE (child
);
11866 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11868 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11869 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11872 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11874 *op
= gimple_op (ass
, operand
);
11876 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11877 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11879 *op
= gimple_op (ass
, operand
+ 1);
11881 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11883 if (gimple_call_internal_p (call
)
11884 && internal_store_fn_p (gimple_call_internal_fn (call
)))
11885 operand
= internal_fn_stored_value_index (gimple_call_internal_fn
11887 *op
= gimple_call_arg (call
, operand
);
11890 gcc_unreachable ();
11893 /* ??? We might want to update *vectype from *slp_def here though
11894 when sharing nodes this would prevent unsharing in the caller. */
11895 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11898 /* If OP is not NULL and is external or constant update its vector
11899 type with VECTYPE. Returns true if successful or false if not,
11900 for example when conflicting vector types are present. */
11903 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11905 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11907 if (SLP_TREE_VECTYPE (op
))
11908 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11909 SLP_TREE_VECTYPE (op
) = vectype
;
11913 /* Function supportable_widening_operation
11915 Check whether an operation represented by the code CODE is a
11916 widening operation that is supported by the target platform in
11917 vector form (i.e., when operating on arguments of type VECTYPE_IN
11918 producing a result of type VECTYPE_OUT).
11920 Widening operations we currently support are NOP (CONVERT), FLOAT,
11921 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11922 are supported by the target platform either directly (via vector
11923 tree-codes), or via target builtins.
11926 - CODE1 and CODE2 are codes of vector operations to be used when
11927 vectorizing the operation, if available.
11928 - MULTI_STEP_CVT determines the number of required intermediate steps in
11929 case of multi-step conversion (like char->short->int - in that case
11930 MULTI_STEP_CVT will be 1).
11931 - INTERM_TYPES contains the intermediate type required to perform the
11932 widening operation (short in the above example). */
11935 supportable_widening_operation (vec_info
*vinfo
,
11936 enum tree_code code
, stmt_vec_info stmt_info
,
11937 tree vectype_out
, tree vectype_in
,
11938 enum tree_code
*code1
, enum tree_code
*code2
,
11939 int *multi_step_cvt
,
11940 vec
<tree
> *interm_types
)
11942 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11943 class loop
*vect_loop
= NULL
;
11944 machine_mode vec_mode
;
11945 enum insn_code icode1
, icode2
;
11946 optab optab1
, optab2
;
11947 tree vectype
= vectype_in
;
11948 tree wide_vectype
= vectype_out
;
11949 enum tree_code c1
, c2
;
11951 tree prev_type
, intermediate_type
;
11952 machine_mode intermediate_mode
, prev_mode
;
11953 optab optab3
, optab4
;
11955 *multi_step_cvt
= 0;
11957 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11961 case WIDEN_MULT_EXPR
:
11962 /* The result of a vectorized widening operation usually requires
11963 two vectors (because the widened results do not fit into one vector).
11964 The generated vector results would normally be expected to be
11965 generated in the same order as in the original scalar computation,
11966 i.e. if 8 results are generated in each vector iteration, they are
11967 to be organized as follows:
11968 vect1: [res1,res2,res3,res4],
11969 vect2: [res5,res6,res7,res8].
11971 However, in the special case that the result of the widening
11972 operation is used in a reduction computation only, the order doesn't
11973 matter (because when vectorizing a reduction we change the order of
11974 the computation). Some targets can take advantage of this and
11975 generate more efficient code. For example, targets like Altivec,
11976 that support widen_mult using a sequence of {mult_even,mult_odd}
11977 generate the following vectors:
11978 vect1: [res1,res3,res5,res7],
11979 vect2: [res2,res4,res6,res8].
11981 When vectorizing outer-loops, we execute the inner-loop sequentially
11982 (each vectorized inner-loop iteration contributes to VF outer-loop
11983 iterations in parallel). We therefore don't allow to change the
11984 order of the computation in the inner-loop during outer-loop
11986 /* TODO: Another case in which order doesn't *really* matter is when we
11987 widen and then contract again, e.g. (short)((int)x * y >> 8).
11988 Normally, pack_trunc performs an even/odd permute, whereas the
11989 repack from an even/odd expansion would be an interleave, which
11990 would be significantly simpler for e.g. AVX2. */
11991 /* In any case, in order to avoid duplicating the code below, recurse
11992 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11993 are properly set up for the caller. If we fail, we'll continue with
11994 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11996 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11997 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11998 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11999 stmt_info
, vectype_out
,
12000 vectype_in
, code1
, code2
,
12001 multi_step_cvt
, interm_types
))
12003 /* Elements in a vector with vect_used_by_reduction property cannot
12004 be reordered if the use chain with this property does not have the
12005 same operation. One such an example is s += a * b, where elements
12006 in a and b cannot be reordered. Here we check if the vector defined
12007 by STMT is only directly used in the reduction statement. */
12008 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
12009 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
12011 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
12014 c1
= VEC_WIDEN_MULT_LO_EXPR
;
12015 c2
= VEC_WIDEN_MULT_HI_EXPR
;
12018 case DOT_PROD_EXPR
:
12019 c1
= DOT_PROD_EXPR
;
12020 c2
= DOT_PROD_EXPR
;
12028 case VEC_WIDEN_MULT_EVEN_EXPR
:
12029 /* Support the recursion induced just above. */
12030 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
12031 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
12034 case WIDEN_LSHIFT_EXPR
:
12035 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
12036 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
12040 c1
= VEC_UNPACK_LO_EXPR
;
12041 c2
= VEC_UNPACK_HI_EXPR
;
12045 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
12046 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
12049 case FIX_TRUNC_EXPR
:
12050 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
12051 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
12055 gcc_unreachable ();
12058 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
12059 std::swap (c1
, c2
);
12061 if (code
== FIX_TRUNC_EXPR
)
12063 /* The signedness is determined from output operand. */
12064 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12065 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
12067 else if (CONVERT_EXPR_CODE_P (code
)
12068 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
12069 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12070 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
12071 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12073 /* If the input and result modes are the same, a different optab
12074 is needed where we pass in the number of units in vectype. */
12075 optab1
= vec_unpacks_sbool_lo_optab
;
12076 optab2
= vec_unpacks_sbool_hi_optab
;
12080 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12081 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
12084 if (!optab1
|| !optab2
)
12087 vec_mode
= TYPE_MODE (vectype
);
12088 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
12089 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
12095 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12096 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12098 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12100 /* For scalar masks we may have different boolean
12101 vector types having the same QImode. Thus we
12102 add additional check for elements number. */
12103 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
12104 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12108 /* Check if it's a multi-step conversion that can be done using intermediate
12111 prev_type
= vectype
;
12112 prev_mode
= vec_mode
;
12114 if (!CONVERT_EXPR_CODE_P (code
))
12117 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12118 intermediate steps in promotion sequence. We try
12119 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12121 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12122 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12124 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12125 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12127 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
12130 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
12131 TYPE_UNSIGNED (prev_type
));
12133 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12134 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12135 && intermediate_mode
== prev_mode
12136 && SCALAR_INT_MODE_P (prev_mode
))
12138 /* If the input and result modes are the same, a different optab
12139 is needed where we pass in the number of units in vectype. */
12140 optab3
= vec_unpacks_sbool_lo_optab
;
12141 optab4
= vec_unpacks_sbool_hi_optab
;
12145 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12146 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12149 if (!optab3
|| !optab4
12150 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12151 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12152 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12153 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12154 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12155 == CODE_FOR_nothing
)
12156 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12157 == CODE_FOR_nothing
))
12160 interm_types
->quick_push (intermediate_type
);
12161 (*multi_step_cvt
)++;
12163 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12164 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12166 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12168 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12169 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12173 prev_type
= intermediate_type
;
12174 prev_mode
= intermediate_mode
;
12177 interm_types
->release ();
12182 /* Function supportable_narrowing_operation
12184 Check whether an operation represented by the code CODE is a
12185 narrowing operation that is supported by the target platform in
12186 vector form (i.e., when operating on arguments of type VECTYPE_IN
12187 and producing a result of type VECTYPE_OUT).
12189 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12190 and FLOAT. This function checks if these operations are supported by
12191 the target platform directly via vector tree-codes.
12194 - CODE1 is the code of a vector operation to be used when
12195 vectorizing the operation, if available.
12196 - MULTI_STEP_CVT determines the number of required intermediate steps in
12197 case of multi-step conversion (like int->short->char - in that case
12198 MULTI_STEP_CVT will be 1).
12199 - INTERM_TYPES contains the intermediate type required to perform the
12200 narrowing operation (short in the above example). */
12203 supportable_narrowing_operation (enum tree_code code
,
12204 tree vectype_out
, tree vectype_in
,
12205 enum tree_code
*code1
, int *multi_step_cvt
,
12206 vec
<tree
> *interm_types
)
12208 machine_mode vec_mode
;
12209 enum insn_code icode1
;
12210 optab optab1
, interm_optab
;
12211 tree vectype
= vectype_in
;
12212 tree narrow_vectype
= vectype_out
;
12214 tree intermediate_type
, prev_type
;
12215 machine_mode intermediate_mode
, prev_mode
;
12219 *multi_step_cvt
= 0;
12223 c1
= VEC_PACK_TRUNC_EXPR
;
12224 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12225 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12226 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
12227 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12228 optab1
= vec_pack_sbool_trunc_optab
;
12230 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12233 case FIX_TRUNC_EXPR
:
12234 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12235 /* The signedness is determined from output operand. */
12236 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12240 c1
= VEC_PACK_FLOAT_EXPR
;
12241 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12245 gcc_unreachable ();
12251 vec_mode
= TYPE_MODE (vectype
);
12252 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12257 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12259 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12261 /* For scalar masks we may have different boolean
12262 vector types having the same QImode. Thus we
12263 add additional check for elements number. */
12264 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12265 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12269 if (code
== FLOAT_EXPR
)
12272 /* Check if it's a multi-step conversion that can be done using intermediate
12274 prev_mode
= vec_mode
;
12275 prev_type
= vectype
;
12276 if (code
== FIX_TRUNC_EXPR
)
12277 uns
= TYPE_UNSIGNED (vectype_out
);
12279 uns
= TYPE_UNSIGNED (vectype
);
12281 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12282 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12283 costly than signed. */
12284 if (code
== FIX_TRUNC_EXPR
&& uns
)
12286 enum insn_code icode2
;
12289 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12291 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12292 if (interm_optab
!= unknown_optab
12293 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12294 && insn_data
[icode1
].operand
[0].mode
12295 == insn_data
[icode2
].operand
[0].mode
)
12298 optab1
= interm_optab
;
12303 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12304 intermediate steps in promotion sequence. We try
12305 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12306 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12307 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12309 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12310 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12312 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12315 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12316 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12317 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12318 && intermediate_mode
== prev_mode
12319 && SCALAR_INT_MODE_P (prev_mode
))
12320 interm_optab
= vec_pack_sbool_trunc_optab
;
12323 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12326 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12327 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12328 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12329 == CODE_FOR_nothing
))
12332 interm_types
->quick_push (intermediate_type
);
12333 (*multi_step_cvt
)++;
12335 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12337 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12339 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12340 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12344 prev_mode
= intermediate_mode
;
12345 prev_type
= intermediate_type
;
12346 optab1
= interm_optab
;
12349 interm_types
->release ();
12353 /* Generate and return a statement that sets vector mask MASK such that
12354 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12357 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
12359 tree cmp_type
= TREE_TYPE (start_index
);
12360 tree mask_type
= TREE_TYPE (mask
);
12361 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12362 cmp_type
, mask_type
,
12363 OPTIMIZE_FOR_SPEED
));
12364 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12365 start_index
, end_index
,
12366 build_zero_cst (mask_type
));
12367 gimple_call_set_lhs (call
, mask
);
12371 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12372 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12375 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12378 tree tmp
= make_ssa_name (mask_type
);
12379 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
12380 gimple_seq_add_stmt (seq
, call
);
12381 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12384 /* Try to compute the vector types required to vectorize STMT_INFO,
12385 returning true on success and false if vectorization isn't possible.
12386 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12387 take sure that the number of elements in the vectors is no bigger
12392 - Set *STMT_VECTYPE_OUT to:
12393 - NULL_TREE if the statement doesn't need to be vectorized;
12394 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12396 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12397 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12398 statement does not help to determine the overall number of units. */
12401 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12402 tree
*stmt_vectype_out
,
12403 tree
*nunits_vectype_out
,
12404 unsigned int group_size
)
12406 gimple
*stmt
= stmt_info
->stmt
;
12408 /* For BB vectorization, we should always have a group size once we've
12409 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12410 are tentative requests during things like early data reference
12411 analysis and pattern recognition. */
12412 if (is_a
<bb_vec_info
> (vinfo
))
12413 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12417 *stmt_vectype_out
= NULL_TREE
;
12418 *nunits_vectype_out
= NULL_TREE
;
12420 if (gimple_get_lhs (stmt
) == NULL_TREE
12421 /* MASK_STORE has no lhs, but is ok. */
12422 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12424 if (is_a
<gcall
*> (stmt
))
12426 /* Ignore calls with no lhs. These must be calls to
12427 #pragma omp simd functions, and what vectorization factor
12428 it really needs can't be determined until
12429 vectorizable_simd_clone_call. */
12430 if (dump_enabled_p ())
12431 dump_printf_loc (MSG_NOTE
, vect_location
,
12432 "defer to SIMD clone analysis.\n");
12433 return opt_result::success ();
12436 return opt_result::failure_at (stmt
,
12437 "not vectorized: irregular stmt.%G", stmt
);
12440 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
12441 return opt_result::failure_at (stmt
,
12442 "not vectorized: vector stmt in loop:%G",
12446 tree scalar_type
= NULL_TREE
;
12447 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12449 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12450 if (dump_enabled_p ())
12451 dump_printf_loc (MSG_NOTE
, vect_location
,
12452 "precomputed vectype: %T\n", vectype
);
12454 else if (vect_use_mask_type_p (stmt_info
))
12456 unsigned int precision
= stmt_info
->mask_precision
;
12457 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12458 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12460 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12461 " data-type %T\n", scalar_type
);
12462 if (dump_enabled_p ())
12463 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12467 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12468 scalar_type
= TREE_TYPE (DR_REF (dr
));
12469 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12470 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12472 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12474 if (dump_enabled_p ())
12477 dump_printf_loc (MSG_NOTE
, vect_location
,
12478 "get vectype for scalar type (group size %d):"
12479 " %T\n", group_size
, scalar_type
);
12481 dump_printf_loc (MSG_NOTE
, vect_location
,
12482 "get vectype for scalar type: %T\n", scalar_type
);
12484 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12486 return opt_result::failure_at (stmt
,
12488 " unsupported data-type %T\n",
12491 if (dump_enabled_p ())
12492 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12494 *stmt_vectype_out
= vectype
;
12496 /* Don't try to compute scalar types if the stmt produces a boolean
12497 vector; use the existing vector type instead. */
12498 tree nunits_vectype
= vectype
;
12499 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12501 /* The number of units is set according to the smallest scalar
12502 type (or the largest vector size, but we only support one
12503 vector size per vectorization). */
12504 HOST_WIDE_INT dummy
;
12505 scalar_type
= vect_get_smallest_scalar_type (stmt_info
, &dummy
, &dummy
);
12506 if (scalar_type
!= TREE_TYPE (vectype
))
12508 if (dump_enabled_p ())
12509 dump_printf_loc (MSG_NOTE
, vect_location
,
12510 "get vectype for smallest scalar type: %T\n",
12512 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12514 if (!nunits_vectype
)
12515 return opt_result::failure_at
12516 (stmt
, "not vectorized: unsupported data-type %T\n",
12518 if (dump_enabled_p ())
12519 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12524 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12525 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)));
12527 if (dump_enabled_p ())
12529 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12530 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12531 dump_printf (MSG_NOTE
, "\n");
12534 *nunits_vectype_out
= nunits_vectype
;
12535 return opt_result::success ();