1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 int misalign
, enum vect_cost_model_location where
)
97 if ((kind
== vector_load
|| kind
== unaligned_load
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_gather_load
;
100 if ((kind
== vector_store
|| kind
== unaligned_store
)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
102 kind
= vector_scatter_store
;
104 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, misalign
};
105 body_cost_vec
->safe_push (si
);
107 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
128 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
130 tree vect_type
, vect
, vect_name
, array_ref
;
133 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
134 vect_type
= TREE_TYPE (TREE_TYPE (array
));
135 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
136 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
137 build_int_cst (size_type_node
, n
),
138 NULL_TREE
, NULL_TREE
);
140 new_stmt
= gimple_build_assign (vect
, array_ref
);
141 vect_name
= make_ssa_name (vect
, new_stmt
);
142 gimple_assign_set_lhs (new_stmt
, vect_name
);
143 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
153 write_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
154 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
159 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
160 build_int_cst (size_type_node
, n
),
161 NULL_TREE
, NULL_TREE
);
163 new_stmt
= gimple_build_assign (array_ref
, vect
);
164 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
172 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
176 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
186 vect_clobber_variable (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
189 tree clobber
= build_clobber (TREE_TYPE (var
));
190 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
191 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
202 enum vect_relevant relevant
, bool live_p
)
204 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
205 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE
, vect_location
,
209 "mark relevant %d, live %d: %G", relevant
, live_p
,
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE
, vect_location
,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info
= stmt_info
;
228 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
230 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
231 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
234 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
235 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
236 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
238 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE
, vect_location
,
243 "already marked relevant/live.\n");
247 worklist
->safe_push (stmt_info
);
251 /* Function is_simple_and_all_uses_invariant
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
257 loop_vec_info loop_vinfo
)
262 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
266 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
268 enum vect_def_type dt
= vect_uninitialized_def
;
270 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
274 "use not simple.\n");
278 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
297 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
298 enum vect_relevant
*relevant
, bool *live_p
)
300 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
302 imm_use_iterator imm_iter
;
306 *relevant
= vect_unused_in_scope
;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info
->stmt
)
311 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
312 *relevant
= vect_used_in_scope
;
314 /* changing memory. */
315 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
316 if (gimple_vdef (stmt_info
->stmt
)
317 && !gimple_clobber_p (stmt_info
->stmt
))
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE
, vect_location
,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant
= vect_used_in_scope
;
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
328 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
330 basic_block bb
= gimple_bb (USE_STMT (use_p
));
331 if (!flow_bb_inside_loop_p (loop
, bb
))
333 if (dump_enabled_p ())
334 dump_printf_loc (MSG_NOTE
, vect_location
,
335 "vec_stmt_relevant_p: used out of loop.\n");
337 if (is_gimple_debug (USE_STMT (use_p
)))
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
343 gcc_assert (bb
== single_exit (loop
)->dest
);
350 if (*live_p
&& *relevant
== vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE
, vect_location
,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant
= vect_used_only_live
;
359 return (*live_p
|| *relevant
);
363 /* Function exist_non_indexing_operands_for_use_p
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
369 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info
))
379 /* STMT has a data_ref. FORNOW this means that its of one of
383 (This should have been verified in analyze_data_refs).
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
392 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
393 if (!assign
|| !gimple_assign_copy_p (assign
))
395 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
396 if (call
&& gimple_call_internal_p (call
))
398 internal_fn ifn
= gimple_call_internal_fn (call
);
399 int mask_index
= internal_fn_mask_index (ifn
);
401 && use
== gimple_call_arg (call
, mask_index
))
403 int stored_value_index
= internal_fn_stored_value_index (ifn
);
404 if (stored_value_index
>= 0
405 && use
== gimple_call_arg (call
, stored_value_index
))
407 if (internal_gather_scatter_fn_p (ifn
)
408 && use
== gimple_call_arg (call
, 1))
414 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
416 operand
= gimple_assign_rhs1 (assign
);
417 if (TREE_CODE (operand
) != SSA_NAME
)
428 Function process_use.
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
452 Return true if everything is as expected. Return false otherwise. */
455 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
456 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
459 stmt_vec_info dstmt_vinfo
;
460 basic_block bb
, def_bb
;
461 enum vect_def_type dt
;
463 /* case 1: we are only interested in uses that need to be vectorized. Uses
464 that are used for address computation are not considered relevant. */
465 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
466 return opt_result::success ();
468 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
469 return opt_result::failure_at (stmt_vinfo
->stmt
,
471 " unsupported use in stmt.\n");
474 return opt_result::success ();
476 def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 DSTMT_VINFO must have already been processed, because this should be the
480 only way that STMT, which is a reduction-phi, was put in the worklist,
481 as there should be no other uses for DSTMT_VINFO in the loop. So we just
482 check that everything is as expected, and we are done. */
483 bb
= gimple_bb (stmt_vinfo
->stmt
);
484 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
485 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
486 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
488 && bb
->loop_father
== def_bb
->loop_father
)
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_NOTE
, vect_location
,
492 "reduc-stmt defining reduc-phi in the same nest.\n");
493 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
494 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
495 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
496 return opt_result::success ();
499 /* case 3a: outer-loop stmt defining an inner-loop stmt:
500 outer-loop-header-bb:
506 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE
, vect_location
,
510 "outer-loop def-stmt defining inner-loop stmt.\n");
514 case vect_unused_in_scope
:
515 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
516 vect_used_in_scope
: vect_unused_in_scope
;
519 case vect_used_in_outer_by_reduction
:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
521 relevant
= vect_used_by_reduction
;
524 case vect_used_in_outer
:
525 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
526 relevant
= vect_used_in_scope
;
529 case vect_used_in_scope
:
537 /* case 3b: inner-loop stmt defining an outer-loop stmt:
538 outer-loop-header-bb:
542 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
544 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
546 if (dump_enabled_p ())
547 dump_printf_loc (MSG_NOTE
, vect_location
,
548 "inner-loop def-stmt defining outer-loop stmt.\n");
552 case vect_unused_in_scope
:
553 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
554 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
555 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
558 case vect_used_by_reduction
:
559 case vect_used_only_live
:
560 relevant
= vect_used_in_outer_by_reduction
;
563 case vect_used_in_scope
:
564 relevant
= vect_used_in_outer
;
571 /* We are also not interested in uses on loop PHI backedges that are
572 inductions. Otherwise we'll needlessly vectorize the IV increment
573 and cause hybrid SLP for SLP inductions. Unless the PHI is live
575 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
576 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
577 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
578 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
579 loop_latch_edge (bb
->loop_father
))
582 if (dump_enabled_p ())
583 dump_printf_loc (MSG_NOTE
, vect_location
,
584 "induction value on backedge.\n");
585 return opt_result::success ();
589 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
590 return opt_result::success ();
594 /* Function vect_mark_stmts_to_be_vectorized.
596 Not all stmts in the loop need to be vectorized. For example:
605 Stmt 1 and 3 do not need to be vectorized, because loop control and
606 addressing of vectorized data-refs are handled differently.
608 This pass detects such stmts. */
611 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
613 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
614 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
615 unsigned int nbbs
= loop
->num_nodes
;
616 gimple_stmt_iterator si
;
620 enum vect_relevant relevant
;
622 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
624 auto_vec
<stmt_vec_info
, 64> worklist
;
626 /* 1. Init worklist. */
627 for (i
= 0; i
< nbbs
; i
++)
630 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
632 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
637 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
638 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
640 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
642 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE
, vect_location
,
645 "init: stmt relevant? %G", stmt_info
->stmt
);
647 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
648 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
652 /* 2. Process_worklist */
653 while (worklist
.length () > 0)
658 stmt_vec_info stmt_vinfo
= worklist
.pop ();
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE
, vect_location
,
661 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
663 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
664 (DEF_STMT) as relevant/irrelevant according to the relevance property
666 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
668 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
669 propagated as is to the DEF_STMTs of its USEs.
671 One exception is when STMT has been identified as defining a reduction
672 variable; in this case we set the relevance to vect_used_by_reduction.
673 This is because we distinguish between two kinds of relevant stmts -
674 those that are used by a reduction computation, and those that are
675 (also) used by a regular computation. This allows us later on to
676 identify stmts that are used solely by a reduction, and therefore the
677 order of the results that they produce does not have to be kept. */
679 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
681 case vect_reduction_def
:
682 gcc_assert (relevant
!= vect_unused_in_scope
);
683 if (relevant
!= vect_unused_in_scope
684 && relevant
!= vect_used_in_scope
685 && relevant
!= vect_used_by_reduction
686 && relevant
!= vect_used_only_live
)
687 return opt_result::failure_at
688 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
691 case vect_nested_cycle
:
692 if (relevant
!= vect_unused_in_scope
693 && relevant
!= vect_used_in_outer_by_reduction
694 && relevant
!= vect_used_in_outer
)
695 return opt_result::failure_at
696 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
699 case vect_double_reduction_def
:
700 if (relevant
!= vect_unused_in_scope
701 && relevant
!= vect_used_by_reduction
702 && relevant
!= vect_used_only_live
)
703 return opt_result::failure_at
704 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
711 if (is_pattern_stmt_p (stmt_vinfo
))
713 /* Pattern statements are not inserted into the code, so
714 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
715 have to scan the RHS or function arguments instead. */
716 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
718 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
719 tree op
= gimple_assign_rhs1 (assign
);
722 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
725 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
726 loop_vinfo
, relevant
, &worklist
, false);
729 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
730 loop_vinfo
, relevant
, &worklist
, false);
735 for (; i
< gimple_num_ops (assign
); i
++)
737 op
= gimple_op (assign
, i
);
738 if (TREE_CODE (op
) == SSA_NAME
)
741 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
748 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
750 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
752 tree arg
= gimple_call_arg (call
, i
);
754 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
762 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
764 tree op
= USE_FROM_PTR (use_p
);
766 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
772 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
774 gather_scatter_info gs_info
;
775 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
778 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
787 } /* while worklist */
789 return opt_result::success ();
792 /* Compute the prologue cost for invariant or constant operands. */
795 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
796 unsigned opno
, enum vect_def_type dt
,
797 stmt_vector_for_cost
*cost_vec
)
799 vec_info
*vinfo
= stmt_info
->vinfo
;
800 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
801 tree op
= gimple_op (stmt
, opno
);
802 unsigned prologue_cost
= 0;
804 /* Without looking at the actual initializer a vector of
805 constants can be implemented as load from the constant pool.
806 When all elements are the same we can use a splat. */
807 tree vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
));
808 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
809 unsigned num_vects_to_check
;
810 unsigned HOST_WIDE_INT const_nunits
;
812 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
813 && ! multiple_p (const_nunits
, group_size
))
815 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
816 nelt_limit
= const_nunits
;
820 /* If either the vector has variable length or the vectors
821 are composed of repeated whole groups we only need to
822 cost construction once. All vectors will be the same. */
823 num_vects_to_check
= 1;
824 nelt_limit
= group_size
;
826 tree elt
= NULL_TREE
;
828 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
830 unsigned si
= j
% group_size
;
832 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
, opno
);
833 /* ??? We're just tracking whether all operands of a single
834 vector initializer are the same, ideally we'd check if
835 we emitted the same one already. */
836 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
,
840 if (nelt
== nelt_limit
)
842 /* ??? We need to pass down stmt_info for a vector type
843 even if it points to the wrong stmt. */
844 prologue_cost
+= record_stmt_cost
846 dt
== vect_external_def
847 ? (elt
? scalar_to_vec
: vec_construct
)
849 stmt_info
, 0, vect_prologue
);
854 return prologue_cost
;
857 /* Function vect_model_simple_cost.
859 Models cost for simple operations, i.e. those that only emit ncopies of a
860 single op. Right now, this does not account for multiple insns that could
861 be generated for the single vector op. We will handle that shortly. */
864 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
865 enum vect_def_type
*dt
,
868 stmt_vector_for_cost
*cost_vec
)
870 int inside_cost
= 0, prologue_cost
= 0;
872 gcc_assert (cost_vec
!= NULL
);
874 /* ??? Somehow we need to fix this at the callers. */
876 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
880 /* Scan operands and account for prologue cost of constants/externals.
881 ??? This over-estimates cost for multiple uses and should be
883 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
884 tree lhs
= gimple_get_lhs (stmt
);
885 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
887 tree op
= gimple_op (stmt
, i
);
888 enum vect_def_type dt
;
889 if (!op
|| op
== lhs
)
891 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
892 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
893 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
898 /* Cost the "broadcast" of a scalar operand in to a vector operand.
899 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
901 for (int i
= 0; i
< ndts
; i
++)
902 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
903 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
904 stmt_info
, 0, vect_prologue
);
906 /* Adjust for two-operator SLP nodes. */
907 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
910 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
911 stmt_info
, 0, vect_body
);
914 /* Pass the inside-of-loop statements to the target-specific cost model. */
915 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vector_stmt
,
916 stmt_info
, 0, vect_body
);
918 if (dump_enabled_p ())
919 dump_printf_loc (MSG_NOTE
, vect_location
,
920 "vect_model_simple_cost: inside_cost = %d, "
921 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
925 /* Model cost for type demotion and promotion operations. PWR is normally
926 zero for single-step promotions and demotions. It will be one if
927 two-step promotion/demotion is required, and so on. Each additional
928 step doubles the number of instructions required. */
931 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
932 enum vect_def_type
*dt
, int pwr
,
933 stmt_vector_for_cost
*cost_vec
)
936 int inside_cost
= 0, prologue_cost
= 0;
938 for (i
= 0; i
< pwr
+ 1; i
++)
940 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
942 inside_cost
+= record_stmt_cost (cost_vec
, vect_pow2 (tmp
),
943 vec_promote_demote
, stmt_info
, 0,
947 /* FORNOW: Assuming maximum 2 args per stmts. */
948 for (i
= 0; i
< 2; i
++)
949 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
950 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
951 stmt_info
, 0, vect_prologue
);
953 if (dump_enabled_p ())
954 dump_printf_loc (MSG_NOTE
, vect_location
,
955 "vect_model_promotion_demotion_cost: inside_cost = %d, "
956 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
959 /* Returns true if the current function returns DECL. */
962 cfun_returns (tree decl
)
966 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
968 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
971 if (gimple_return_retval (ret
) == decl
)
973 /* We often end up with an aggregate copy to the result decl,
974 handle that case as well. First skip intermediate clobbers
979 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
981 while (gimple_clobber_p (def
));
982 if (is_a
<gassign
*> (def
)
983 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
984 && gimple_assign_rhs1 (def
) == decl
)
990 /* Function vect_model_store_cost
992 Models cost for stores. In the case of grouped accesses, one access
993 has the overhead of the grouped access attributed to it. */
996 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
997 enum vect_def_type dt
,
998 vect_memory_access_type memory_access_type
,
999 vec_load_store_type vls_type
, slp_tree slp_node
,
1000 stmt_vector_for_cost
*cost_vec
)
1002 unsigned int inside_cost
= 0, prologue_cost
= 0;
1003 stmt_vec_info first_stmt_info
= stmt_info
;
1004 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1006 /* ??? Somehow we need to fix this at the callers. */
1008 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1010 if (vls_type
== VLS_STORE_INVARIANT
)
1013 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
1016 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
1017 stmt_info
, 0, vect_prologue
);
1020 /* Grouped stores update all elements in the group at once,
1021 so we want the DR for the first statement. */
1022 if (!slp_node
&& grouped_access_p
)
1023 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1025 /* True if we should include any once-per-group costs as well as
1026 the cost of the statement itself. For SLP we only get called
1027 once per group anyhow. */
1028 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1030 /* We assume that the cost of a single store-lanes instruction is
1031 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1032 access is instead being provided by a permute-and-store operation,
1033 include the cost of the permutes. */
1035 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1037 /* Uses a high and low interleave or shuffle operations for each
1039 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1040 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1041 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1042 stmt_info
, 0, vect_body
);
1044 if (dump_enabled_p ())
1045 dump_printf_loc (MSG_NOTE
, vect_location
,
1046 "vect_model_store_cost: strided group_size = %d .\n",
1050 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1051 /* Costs of the stores. */
1052 if (memory_access_type
== VMAT_ELEMENTWISE
1053 || memory_access_type
== VMAT_GATHER_SCATTER
)
1055 /* N scalar stores plus extracting the elements. */
1056 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1057 inside_cost
+= record_stmt_cost (cost_vec
,
1058 ncopies
* assumed_nunits
,
1059 scalar_store
, stmt_info
, 0, vect_body
);
1062 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1064 if (memory_access_type
== VMAT_ELEMENTWISE
1065 || memory_access_type
== VMAT_STRIDED_SLP
)
1067 /* N scalar stores plus extracting the elements. */
1068 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1069 inside_cost
+= record_stmt_cost (cost_vec
,
1070 ncopies
* assumed_nunits
,
1071 vec_to_scalar
, stmt_info
, 0, vect_body
);
1074 /* When vectorizing a store into the function result assign
1075 a penalty if the function returns in a multi-register location.
1076 In this case we assume we'll end up with having to spill the
1077 vector result and do piecewise loads as a conservative estimate. */
1078 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1080 && (TREE_CODE (base
) == RESULT_DECL
1081 || (DECL_P (base
) && cfun_returns (base
)))
1082 && !aggregate_value_p (base
, cfun
->decl
))
1084 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1085 /* ??? Handle PARALLEL in some way. */
1088 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1089 /* Assume that a single reg-reg move is possible and cheap,
1090 do not account for vector to gp register move cost. */
1094 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1096 stmt_info
, 0, vect_epilogue
);
1098 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1100 stmt_info
, 0, vect_epilogue
);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE
, vect_location
,
1107 "vect_model_store_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1112 /* Calculate cost of DR's memory access. */
1114 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1115 unsigned int *inside_cost
,
1116 stmt_vector_for_cost
*body_cost_vec
)
1118 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1119 int alignment_support_scheme
1120 = vect_supportable_dr_alignment (dr_info
, false);
1122 switch (alignment_support_scheme
)
1126 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1127 vector_store
, stmt_info
, 0,
1130 if (dump_enabled_p ())
1131 dump_printf_loc (MSG_NOTE
, vect_location
,
1132 "vect_model_store_cost: aligned.\n");
1136 case dr_unaligned_supported
:
1138 /* Here, we assign an additional cost for the unaligned store. */
1139 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1140 unaligned_store
, stmt_info
,
1141 DR_MISALIGNMENT (dr_info
),
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE
, vect_location
,
1145 "vect_model_store_cost: unaligned supported by "
1150 case dr_unaligned_unsupported
:
1152 *inside_cost
= VECT_MAX_COST
;
1154 if (dump_enabled_p ())
1155 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1156 "vect_model_store_cost: unsupported access.\n");
1166 /* Function vect_model_load_cost
1168 Models cost for loads. In the case of grouped accesses, one access has
1169 the overhead of the grouped access attributed to it. Since unaligned
1170 accesses are supported for loads, we also account for the costs of the
1171 access scheme chosen. */
1174 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1175 vect_memory_access_type memory_access_type
,
1176 slp_instance instance
,
1178 stmt_vector_for_cost
*cost_vec
)
1180 unsigned int inside_cost
= 0, prologue_cost
= 0;
1181 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1183 gcc_assert (cost_vec
);
1185 /* ??? Somehow we need to fix this at the callers. */
1187 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1189 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1191 /* If the load is permuted then the alignment is determined by
1192 the first group element not by the first scalar stmt DR. */
1193 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1194 /* Record the cost for the permutation. */
1196 unsigned assumed_nunits
1197 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1198 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1199 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1200 slp_vf
, instance
, true,
1202 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1203 first_stmt_info
, 0, vect_body
);
1204 /* And adjust the number of loads performed. This handles
1205 redundancies as well as loads that are later dead. */
1206 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1207 bitmap_clear (perm
);
1208 for (unsigned i
= 0;
1209 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1210 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1212 bool load_seen
= false;
1213 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1215 if (i
% assumed_nunits
== 0)
1221 if (bitmap_bit_p (perm
, i
))
1227 <= (DR_GROUP_SIZE (first_stmt_info
)
1228 - DR_GROUP_GAP (first_stmt_info
)
1229 + assumed_nunits
- 1) / assumed_nunits
);
1232 /* Grouped loads read all elements in the group at once,
1233 so we want the DR for the first statement. */
1234 stmt_vec_info first_stmt_info
= stmt_info
;
1235 if (!slp_node
&& grouped_access_p
)
1236 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1238 /* True if we should include any once-per-group costs as well as
1239 the cost of the statement itself. For SLP we only get called
1240 once per group anyhow. */
1241 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1243 /* We assume that the cost of a single load-lanes instruction is
1244 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1245 access is instead being provided by a load-and-permute operation,
1246 include the cost of the permutes. */
1248 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1250 /* Uses an even and odd extract operations or shuffle operations
1251 for each needed permute. */
1252 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1253 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1254 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1255 stmt_info
, 0, vect_body
);
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE
, vect_location
,
1259 "vect_model_load_cost: strided group_size = %d .\n",
1263 /* The loads themselves. */
1264 if (memory_access_type
== VMAT_ELEMENTWISE
1265 || memory_access_type
== VMAT_GATHER_SCATTER
)
1267 /* N scalar loads plus gathering them into a vector. */
1268 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1269 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1270 inside_cost
+= record_stmt_cost (cost_vec
,
1271 ncopies
* assumed_nunits
,
1272 scalar_load
, stmt_info
, 0, vect_body
);
1275 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1276 &inside_cost
, &prologue_cost
,
1277 cost_vec
, cost_vec
, true);
1278 if (memory_access_type
== VMAT_ELEMENTWISE
1279 || memory_access_type
== VMAT_STRIDED_SLP
)
1280 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1281 stmt_info
, 0, vect_body
);
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE
, vect_location
,
1285 "vect_model_load_cost: inside_cost = %d, "
1286 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1290 /* Calculate cost of DR's memory access. */
1292 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1293 bool add_realign_cost
, unsigned int *inside_cost
,
1294 unsigned int *prologue_cost
,
1295 stmt_vector_for_cost
*prologue_cost_vec
,
1296 stmt_vector_for_cost
*body_cost_vec
,
1297 bool record_prologue_costs
)
1299 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1300 int alignment_support_scheme
1301 = vect_supportable_dr_alignment (dr_info
, false);
1303 switch (alignment_support_scheme
)
1307 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1308 stmt_info
, 0, vect_body
);
1310 if (dump_enabled_p ())
1311 dump_printf_loc (MSG_NOTE
, vect_location
,
1312 "vect_model_load_cost: aligned.\n");
1316 case dr_unaligned_supported
:
1318 /* Here, we assign an additional cost for the unaligned load. */
1319 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1320 unaligned_load
, stmt_info
,
1321 DR_MISALIGNMENT (dr_info
),
1324 if (dump_enabled_p ())
1325 dump_printf_loc (MSG_NOTE
, vect_location
,
1326 "vect_model_load_cost: unaligned supported by "
1331 case dr_explicit_realign
:
1333 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1334 vector_load
, stmt_info
, 0, vect_body
);
1335 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1336 vec_perm
, stmt_info
, 0, vect_body
);
1338 /* FIXME: If the misalignment remains fixed across the iterations of
1339 the containing loop, the following cost should be added to the
1341 if (targetm
.vectorize
.builtin_mask_for_load
)
1342 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1343 stmt_info
, 0, vect_body
);
1345 if (dump_enabled_p ())
1346 dump_printf_loc (MSG_NOTE
, vect_location
,
1347 "vect_model_load_cost: explicit realign\n");
1351 case dr_explicit_realign_optimized
:
1353 if (dump_enabled_p ())
1354 dump_printf_loc (MSG_NOTE
, vect_location
,
1355 "vect_model_load_cost: unaligned software "
1358 /* Unaligned software pipeline has a load of an address, an initial
1359 load, and possibly a mask operation to "prime" the loop. However,
1360 if this is an access in a group of loads, which provide grouped
1361 access, then the above cost should only be considered for one
1362 access in the group. Inside the loop, there is a load op
1363 and a realignment op. */
1365 if (add_realign_cost
&& record_prologue_costs
)
1367 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1368 vector_stmt
, stmt_info
,
1370 if (targetm
.vectorize
.builtin_mask_for_load
)
1371 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1372 vector_stmt
, stmt_info
,
1376 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1377 stmt_info
, 0, vect_body
);
1378 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1379 stmt_info
, 0, vect_body
);
1381 if (dump_enabled_p ())
1382 dump_printf_loc (MSG_NOTE
, vect_location
,
1383 "vect_model_load_cost: explicit realign optimized"
1389 case dr_unaligned_unsupported
:
1391 *inside_cost
= VECT_MAX_COST
;
1393 if (dump_enabled_p ())
1394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1395 "vect_model_load_cost: unsupported access.\n");
1404 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1405 the loop preheader for the vectorized stmt STMT_VINFO. */
1408 vect_init_vector_1 (stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1409 gimple_stmt_iterator
*gsi
)
1412 vect_finish_stmt_generation (stmt_vinfo
, new_stmt
, gsi
);
1415 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1419 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1423 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1426 pe
= loop_preheader_edge (loop
);
1427 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1428 gcc_assert (!new_bb
);
1432 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1434 gimple_stmt_iterator gsi_bb_start
;
1436 gcc_assert (bb_vinfo
);
1437 bb
= BB_VINFO_BB (bb_vinfo
);
1438 gsi_bb_start
= gsi_after_labels (bb
);
1439 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE
, vect_location
,
1445 "created new init_stmt: %G", new_stmt
);
1448 /* Function vect_init_vector.
1450 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1451 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1452 vector type a vector with all elements equal to VAL is created first.
1453 Place the initialization at GSI if it is not NULL. Otherwise, place the
1454 initialization at the loop preheader.
1455 Return the DEF of INIT_STMT.
1456 It will be used in the vectorization of STMT_INFO. */
1459 vect_init_vector (stmt_vec_info stmt_info
, tree val
, tree type
,
1460 gimple_stmt_iterator
*gsi
)
1465 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1466 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1468 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1469 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1471 /* Scalar boolean value should be transformed into
1472 all zeros or all ones value before building a vector. */
1473 if (VECTOR_BOOLEAN_TYPE_P (type
))
1475 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1476 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1478 if (CONSTANT_CLASS_P (val
))
1479 val
= integer_zerop (val
) ? false_val
: true_val
;
1482 new_temp
= make_ssa_name (TREE_TYPE (type
));
1483 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1484 val
, true_val
, false_val
);
1485 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1491 gimple_seq stmts
= NULL
;
1492 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1493 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1494 TREE_TYPE (type
), val
);
1496 /* ??? Condition vectorization expects us to do
1497 promotion of invariant/external defs. */
1498 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1499 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1500 !gsi_end_p (gsi2
); )
1502 init_stmt
= gsi_stmt (gsi2
);
1503 gsi_remove (&gsi2
, false);
1504 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1508 val
= build_vector_from_val (type
, val
);
1511 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1512 init_stmt
= gimple_build_assign (new_temp
, val
);
1513 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1517 /* Function vect_get_vec_def_for_operand_1.
1519 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1520 with type DT that will be used in the vectorized stmt. */
1523 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1524 enum vect_def_type dt
)
1527 stmt_vec_info vec_stmt_info
;
1531 /* operand is a constant or a loop invariant. */
1532 case vect_constant_def
:
1533 case vect_external_def
:
1534 /* Code should use vect_get_vec_def_for_operand. */
1537 /* Operand is defined by a loop header phi. In case of nested
1538 cycles we also may have uses of the backedge def. */
1539 case vect_reduction_def
:
1540 case vect_double_reduction_def
:
1541 case vect_nested_cycle
:
1542 case vect_induction_def
:
1543 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1544 || dt
== vect_nested_cycle
);
1547 /* operand is defined inside the loop. */
1548 case vect_internal_def
:
1550 /* Get the def from the vectorized stmt. */
1551 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1552 /* Get vectorized pattern statement. */
1554 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1555 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1556 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1557 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1558 gcc_assert (vec_stmt_info
);
1559 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1560 vec_oprnd
= PHI_RESULT (phi
);
1562 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1572 /* Function vect_get_vec_def_for_operand.
1574 OP is an operand in STMT_VINFO. This function returns a (vector) def
1575 that will be used in the vectorized stmt for STMT_VINFO.
1577 In the case that OP is an SSA_NAME which is defined in the loop, then
1578 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1580 In case OP is an invariant or constant, a new stmt that creates a vector def
1581 needs to be introduced. VECTYPE may be used to specify a required type for
1582 vector invariant. */
1585 vect_get_vec_def_for_operand (tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1588 enum vect_def_type dt
;
1590 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1592 if (dump_enabled_p ())
1593 dump_printf_loc (MSG_NOTE
, vect_location
,
1594 "vect_get_vec_def_for_operand: %T\n", op
);
1596 stmt_vec_info def_stmt_info
;
1597 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1598 &def_stmt_info
, &def_stmt
);
1599 gcc_assert (is_simple_use
);
1600 if (def_stmt
&& dump_enabled_p ())
1601 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1603 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1605 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1609 vector_type
= vectype
;
1610 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1611 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1612 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1614 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1616 gcc_assert (vector_type
);
1617 return vect_init_vector (stmt_vinfo
, op
, vector_type
, NULL
);
1620 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1624 /* Function vect_get_vec_def_for_stmt_copy
1626 Return a vector-def for an operand. This function is used when the
1627 vectorized stmt to be created (by the caller to this function) is a "copy"
1628 created in case the vectorized result cannot fit in one vector, and several
1629 copies of the vector-stmt are required. In this case the vector-def is
1630 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1631 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1634 In case the vectorization factor (VF) is bigger than the number
1635 of elements that can fit in a vectype (nunits), we have to generate
1636 more than one vector stmt to vectorize the scalar stmt. This situation
1637 arises when there are multiple data-types operated upon in the loop; the
1638 smallest data-type determines the VF, and as a result, when vectorizing
1639 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1640 vector stmt (each computing a vector of 'nunits' results, and together
1641 computing 'VF' results in each iteration). This function is called when
1642 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1643 which VF=16 and nunits=4, so the number of copies required is 4):
1645 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1647 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1648 VS1.1: vx.1 = memref1 VS1.2
1649 VS1.2: vx.2 = memref2 VS1.3
1650 VS1.3: vx.3 = memref3
1652 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1653 VSnew.1: vz1 = vx.1 + ... VSnew.2
1654 VSnew.2: vz2 = vx.2 + ... VSnew.3
1655 VSnew.3: vz3 = vx.3 + ...
1657 The vectorization of S1 is explained in vectorizable_load.
1658 The vectorization of S2:
1659 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1660 the function 'vect_get_vec_def_for_operand' is called to
1661 get the relevant vector-def for each operand of S2. For operand x it
1662 returns the vector-def 'vx.0'.
1664 To create the remaining copies of the vector-stmt (VSnew.j), this
1665 function is called to get the relevant vector-def for each operand. It is
1666 obtained from the respective VS1.j stmt, which is recorded in the
1667 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1669 For example, to obtain the vector-def 'vx.1' in order to create the
1670 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1671 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1672 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1673 and return its def ('vx.1').
1674 Overall, to create the above sequence this function will be called 3 times:
1675 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1676 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1677 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1680 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1682 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1684 /* Do nothing; can reuse same def. */
1687 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1688 gcc_assert (def_stmt_info
);
1689 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1690 vec_oprnd
= PHI_RESULT (phi
);
1692 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1697 /* Get vectorized definitions for the operands to create a copy of an original
1698 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1701 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1702 vec
<tree
> *vec_oprnds0
,
1703 vec
<tree
> *vec_oprnds1
)
1705 tree vec_oprnd
= vec_oprnds0
->pop ();
1707 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1708 vec_oprnds0
->quick_push (vec_oprnd
);
1710 if (vec_oprnds1
&& vec_oprnds1
->length ())
1712 vec_oprnd
= vec_oprnds1
->pop ();
1713 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1714 vec_oprnds1
->quick_push (vec_oprnd
);
1719 /* Get vectorized definitions for OP0 and OP1. */
1722 vect_get_vec_defs (tree op0
, tree op1
, stmt_vec_info stmt_info
,
1723 vec
<tree
> *vec_oprnds0
,
1724 vec
<tree
> *vec_oprnds1
,
1729 auto_vec
<vec
<tree
> > vec_defs (SLP_TREE_CHILDREN (slp_node
).length ());
1730 vect_get_slp_defs (slp_node
, &vec_defs
, op1
? 2 : 1);
1731 *vec_oprnds0
= vec_defs
[0];
1733 *vec_oprnds1
= vec_defs
[1];
1739 vec_oprnds0
->create (1);
1740 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt_info
);
1741 vec_oprnds0
->quick_push (vec_oprnd
);
1745 vec_oprnds1
->create (1);
1746 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt_info
);
1747 vec_oprnds1
->quick_push (vec_oprnd
);
1752 /* Helper function called by vect_finish_replace_stmt and
1753 vect_finish_stmt_generation. Set the location of the new
1754 statement and create and return a stmt_vec_info for it. */
1756 static stmt_vec_info
1757 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1759 vec_info
*vinfo
= stmt_info
->vinfo
;
1761 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1763 if (dump_enabled_p ())
1764 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1766 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1768 /* While EH edges will generally prevent vectorization, stmt might
1769 e.g. be in a must-not-throw region. Ensure newly created stmts
1770 that could throw are part of the same region. */
1771 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1772 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1773 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1775 return vec_stmt_info
;
1778 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1779 which sets the same scalar result as STMT_INFO did. Create and return a
1780 stmt_vec_info for VEC_STMT. */
1783 vect_finish_replace_stmt (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1785 gcc_assert (gimple_get_lhs (stmt_info
->stmt
) == gimple_get_lhs (vec_stmt
));
1787 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt_info
->stmt
);
1788 gsi_replace (&gsi
, vec_stmt
, true);
1790 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1793 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1794 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1797 vect_finish_stmt_generation (stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1798 gimple_stmt_iterator
*gsi
)
1800 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1802 if (!gsi_end_p (*gsi
)
1803 && gimple_has_mem_ops (vec_stmt
))
1805 gimple
*at_stmt
= gsi_stmt (*gsi
);
1806 tree vuse
= gimple_vuse (at_stmt
);
1807 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1809 tree vdef
= gimple_vdef (at_stmt
);
1810 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1811 /* If we have an SSA vuse and insert a store, update virtual
1812 SSA form to avoid triggering the renamer. Do so only
1813 if we can easily see all uses - which is what almost always
1814 happens with the way vectorized stmts are inserted. */
1815 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1816 && ((is_gimple_assign (vec_stmt
)
1817 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1818 || (is_gimple_call (vec_stmt
)
1819 && !(gimple_call_flags (vec_stmt
)
1820 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1822 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1823 gimple_set_vdef (vec_stmt
, new_vdef
);
1824 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1828 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1829 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1832 /* We want to vectorize a call to combined function CFN with function
1833 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1834 as the types of all inputs. Check whether this is possible using
1835 an internal function, returning its code if so or IFN_LAST if not. */
1838 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1839 tree vectype_out
, tree vectype_in
)
1842 if (internal_fn_p (cfn
))
1843 ifn
= as_internal_fn (cfn
);
1845 ifn
= associated_internal_fn (fndecl
);
1846 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1848 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1849 if (info
.vectorizable
)
1851 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1852 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1853 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1854 OPTIMIZE_FOR_SPEED
))
1862 static tree
permute_vec_elements (tree
, tree
, tree
, stmt_vec_info
,
1863 gimple_stmt_iterator
*);
1865 /* Check whether a load or store statement in the loop described by
1866 LOOP_VINFO is possible in a fully-masked loop. This is testing
1867 whether the vectorizer pass has the appropriate support, as well as
1868 whether the target does.
1870 VLS_TYPE says whether the statement is a load or store and VECTYPE
1871 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1872 says how the load or store is going to be implemented and GROUP_SIZE
1873 is the number of load or store statements in the containing group.
1874 If the access is a gather load or scatter store, GS_INFO describes
1875 its arguments. If the load or store is conditional, SCALAR_MASK is the
1876 condition under which it occurs.
1878 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1879 supported, otherwise record the required mask types. */
1882 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1883 vec_load_store_type vls_type
, int group_size
,
1884 vect_memory_access_type memory_access_type
,
1885 gather_scatter_info
*gs_info
, tree scalar_mask
)
1887 /* Invariant loads need no special support. */
1888 if (memory_access_type
== VMAT_INVARIANT
)
1891 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1892 machine_mode vecmode
= TYPE_MODE (vectype
);
1893 bool is_load
= (vls_type
== VLS_LOAD
);
1894 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1897 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1898 : !vect_store_lanes_supported (vectype
, group_size
, true))
1900 if (dump_enabled_p ())
1901 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1902 "can't use a fully-masked loop because the"
1903 " target doesn't have an appropriate masked"
1904 " load/store-lanes instruction.\n");
1905 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1908 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1909 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1913 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1915 internal_fn ifn
= (is_load
1916 ? IFN_MASK_GATHER_LOAD
1917 : IFN_MASK_SCATTER_STORE
);
1918 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1919 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1920 gs_info
->memory_type
,
1921 TYPE_SIGN (offset_type
),
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1926 "can't use a fully-masked loop because the"
1927 " target doesn't have an appropriate masked"
1928 " gather load or scatter store instruction.\n");
1929 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1932 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1933 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1937 if (memory_access_type
!= VMAT_CONTIGUOUS
1938 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1940 /* Element X of the data must come from iteration i * VF + X of the
1941 scalar loop. We need more work to support other mappings. */
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1944 "can't use a fully-masked loop because an access"
1945 " isn't contiguous.\n");
1946 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1950 machine_mode mask_mode
;
1951 if (!(targetm
.vectorize
.get_mask_mode
1952 (GET_MODE_NUNITS (vecmode
),
1953 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1954 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1958 "can't use a fully-masked loop because the target"
1959 " doesn't have the appropriate masked load or"
1961 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1964 /* We might load more scalars than we need for permuting SLP loads.
1965 We checked in get_group_load_store_type that the extra elements
1966 don't leak into a new vector. */
1967 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1968 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1969 unsigned int nvectors
;
1970 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1971 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1976 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1977 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1978 that needs to be applied to all loads and stores in a vectorized loop.
1979 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1981 MASK_TYPE is the type of both masks. If new statements are needed,
1982 insert them before GSI. */
1985 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1986 gimple_stmt_iterator
*gsi
)
1988 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1992 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1993 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1994 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1995 vec_mask
, loop_mask
);
1996 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
2000 /* Determine whether we can use a gather load or scatter store to vectorize
2001 strided load or store STMT_INFO by truncating the current offset to a
2002 smaller width. We need to be able to construct an offset vector:
2004 { 0, X, X*2, X*3, ... }
2006 without loss of precision, where X is STMT_INFO's DR_STEP.
2008 Return true if this is possible, describing the gather load or scatter
2009 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2012 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
2013 loop_vec_info loop_vinfo
, bool masked_p
,
2014 gather_scatter_info
*gs_info
)
2016 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2017 data_reference
*dr
= dr_info
->dr
;
2018 tree step
= DR_STEP (dr
);
2019 if (TREE_CODE (step
) != INTEGER_CST
)
2021 /* ??? Perhaps we could use range information here? */
2022 if (dump_enabled_p ())
2023 dump_printf_loc (MSG_NOTE
, vect_location
,
2024 "cannot truncate variable step.\n");
2028 /* Get the number of bits in an element. */
2029 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2030 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2031 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2033 /* Set COUNT to the upper limit on the number of elements - 1.
2034 Start with the maximum vectorization factor. */
2035 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
2037 /* Try lowering COUNT to the number of scalar latch iterations. */
2038 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2039 widest_int max_iters
;
2040 if (max_loop_iterations (loop
, &max_iters
)
2041 && max_iters
< count
)
2042 count
= max_iters
.to_shwi ();
2044 /* Try scales of 1 and the element size. */
2045 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
2046 wi::overflow_type overflow
= wi::OVF_NONE
;
2047 for (int i
= 0; i
< 2; ++i
)
2049 int scale
= scales
[i
];
2051 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
2054 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2055 in OFFSET_BITS bits. */
2056 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2059 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2060 if (wi::min_precision (range
, sign
) > element_bits
)
2062 overflow
= wi::OVF_UNKNOWN
;
2066 /* See whether the target supports the operation. */
2067 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2068 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr
), masked_p
, vectype
,
2069 memory_type
, element_bits
, sign
, scale
,
2070 &gs_info
->ifn
, &gs_info
->element_type
))
2073 tree offset_type
= build_nonstandard_integer_type (element_bits
,
2076 gs_info
->decl
= NULL_TREE
;
2077 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2078 but we don't need to store that here. */
2079 gs_info
->base
= NULL_TREE
;
2080 gs_info
->offset
= fold_convert (offset_type
, step
);
2081 gs_info
->offset_dt
= vect_constant_def
;
2082 gs_info
->offset_vectype
= NULL_TREE
;
2083 gs_info
->scale
= scale
;
2084 gs_info
->memory_type
= memory_type
;
2088 if (overflow
&& dump_enabled_p ())
2089 dump_printf_loc (MSG_NOTE
, vect_location
,
2090 "truncating gather/scatter offset to %d bits"
2091 " might change its value.\n", element_bits
);
2096 /* Return true if we can use gather/scatter internal functions to
2097 vectorize STMT_INFO, which is a grouped or strided load or store.
2098 MASKED_P is true if load or store is conditional. When returning
2099 true, fill in GS_INFO with the information required to perform the
2103 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2104 loop_vec_info loop_vinfo
, bool masked_p
,
2105 gather_scatter_info
*gs_info
)
2107 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2109 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2112 scalar_mode element_mode
= SCALAR_TYPE_MODE (gs_info
->element_type
);
2113 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2114 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2115 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
2117 /* Enforced by vect_check_gather_scatter. */
2118 gcc_assert (element_bits
>= offset_bits
);
2120 /* If the elements are wider than the offset, convert the offset to the
2121 same width, without changing its sign. */
2122 if (element_bits
> offset_bits
)
2124 bool unsigned_p
= TYPE_UNSIGNED (offset_type
);
2125 offset_type
= build_nonstandard_integer_type (element_bits
, unsigned_p
);
2126 gs_info
->offset
= fold_convert (offset_type
, gs_info
->offset
);
2129 if (dump_enabled_p ())
2130 dump_printf_loc (MSG_NOTE
, vect_location
,
2131 "using gather/scatter for strided/grouped access,"
2132 " scale = %d\n", gs_info
->scale
);
2137 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2138 elements with a known constant step. Return -1 if that step
2139 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2142 compare_step_with_zero (stmt_vec_info stmt_info
)
2144 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2145 return tree_int_cst_compare (vect_dr_behavior (dr_info
)->step
,
2149 /* If the target supports a permute mask that reverses the elements in
2150 a vector of type VECTYPE, return that mask, otherwise return null. */
2153 perm_mask_for_reverse (tree vectype
)
2155 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2157 /* The encoding has a single stepped pattern. */
2158 vec_perm_builder
sel (nunits
, 1, 3);
2159 for (int i
= 0; i
< 3; ++i
)
2160 sel
.quick_push (nunits
- 1 - i
);
2162 vec_perm_indices
indices (sel
, 1, nunits
);
2163 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2165 return vect_gen_perm_mask_checked (vectype
, indices
);
2168 /* STMT_INFO is either a masked or unconditional store. Return the value
2172 vect_get_store_rhs (stmt_vec_info stmt_info
)
2174 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2176 gcc_assert (gimple_assign_single_p (assign
));
2177 return gimple_assign_rhs1 (assign
);
2179 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2181 internal_fn ifn
= gimple_call_internal_fn (call
);
2182 int index
= internal_fn_stored_value_index (ifn
);
2183 gcc_assert (index
>= 0);
2184 return gimple_call_arg (call
, index
);
2189 /* A subroutine of get_load_store_type, with a subset of the same
2190 arguments. Handle the case where STMT_INFO is part of a grouped load
2193 For stores, the statements in the group are all consecutive
2194 and there is no gap at the end. For loads, the statements in the
2195 group might not be consecutive; there can be gaps between statements
2196 as well as at the end. */
2199 get_group_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2200 bool masked_p
, vec_load_store_type vls_type
,
2201 vect_memory_access_type
*memory_access_type
,
2202 gather_scatter_info
*gs_info
)
2204 vec_info
*vinfo
= stmt_info
->vinfo
;
2205 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2206 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2207 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2208 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2209 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2210 bool single_element_p
= (stmt_info
== first_stmt_info
2211 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2212 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2213 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2215 /* True if the vectorized statements would access beyond the last
2216 statement in the group. */
2217 bool overrun_p
= false;
2219 /* True if we can cope with such overrun by peeling for gaps, so that
2220 there is at least one final scalar iteration after the vector loop. */
2221 bool can_overrun_p
= (!masked_p
2222 && vls_type
== VLS_LOAD
2226 /* There can only be a gap at the end of the group if the stride is
2227 known at compile time. */
2228 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2230 /* Stores can't yet have gaps. */
2231 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2235 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2237 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2238 separated by the stride, until we have a complete vector.
2239 Fall back to scalar accesses if that isn't possible. */
2240 if (multiple_p (nunits
, group_size
))
2241 *memory_access_type
= VMAT_STRIDED_SLP
;
2243 *memory_access_type
= VMAT_ELEMENTWISE
;
2247 overrun_p
= loop_vinfo
&& gap
!= 0;
2248 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2250 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2251 "Grouped store with gaps requires"
2252 " non-consecutive accesses\n");
2255 /* An overrun is fine if the trailing elements are smaller
2256 than the alignment boundary B. Every vector access will
2257 be a multiple of B and so we are guaranteed to access a
2258 non-gap element in the same B-sized block. */
2260 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2261 / vect_get_scalar_dr_size (first_dr_info
)))
2264 /* If the gap splits the vector in half and the target
2265 can do half-vector operations avoid the epilogue peeling
2266 by simply loading half of the vector only. Usually
2267 the construction with an upper zero half will be elided. */
2268 dr_alignment_support alignment_support_scheme
;
2269 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2273 && (((alignment_support_scheme
2274 = vect_supportable_dr_alignment (first_dr_info
, false)))
2276 || alignment_support_scheme
== dr_unaligned_supported
)
2277 && known_eq (nunits
, (group_size
- gap
) * 2)
2278 && known_eq (nunits
, group_size
)
2279 && mode_for_vector (elmode
, (group_size
- gap
)).exists (&vmode
)
2280 && VECTOR_MODE_P (vmode
)
2281 && targetm
.vector_mode_supported_p (vmode
)
2282 && (convert_optab_handler (vec_init_optab
,
2283 TYPE_MODE (vectype
), vmode
)
2284 != CODE_FOR_nothing
))
2287 if (overrun_p
&& !can_overrun_p
)
2289 if (dump_enabled_p ())
2290 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2291 "Peeling for outer loop is not supported\n");
2294 *memory_access_type
= VMAT_CONTIGUOUS
;
2299 /* We can always handle this case using elementwise accesses,
2300 but see if something more efficient is available. */
2301 *memory_access_type
= VMAT_ELEMENTWISE
;
2303 /* If there is a gap at the end of the group then these optimizations
2304 would access excess elements in the last iteration. */
2305 bool would_overrun_p
= (gap
!= 0);
2306 /* An overrun is fine if the trailing elements are smaller than the
2307 alignment boundary B. Every vector access will be a multiple of B
2308 and so we are guaranteed to access a non-gap element in the
2309 same B-sized block. */
2312 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2313 / vect_get_scalar_dr_size (first_dr_info
)))
2314 would_overrun_p
= false;
2316 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2317 && (can_overrun_p
|| !would_overrun_p
)
2318 && compare_step_with_zero (stmt_info
) > 0)
2320 /* First cope with the degenerate case of a single-element
2322 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2323 *memory_access_type
= VMAT_CONTIGUOUS
;
2325 /* Otherwise try using LOAD/STORE_LANES. */
2326 if (*memory_access_type
== VMAT_ELEMENTWISE
2327 && (vls_type
== VLS_LOAD
2328 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2329 : vect_store_lanes_supported (vectype
, group_size
,
2332 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2333 overrun_p
= would_overrun_p
;
2336 /* If that fails, try using permuting loads. */
2337 if (*memory_access_type
== VMAT_ELEMENTWISE
2338 && (vls_type
== VLS_LOAD
2339 ? vect_grouped_load_supported (vectype
, single_element_p
,
2341 : vect_grouped_store_supported (vectype
, group_size
)))
2343 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2344 overrun_p
= would_overrun_p
;
2348 /* As a last resort, trying using a gather load or scatter store.
2350 ??? Although the code can handle all group sizes correctly,
2351 it probably isn't a win to use separate strided accesses based
2352 on nearby locations. Or, even if it's a win over scalar code,
2353 it might not be a win over vectorizing at a lower VF, if that
2354 allows us to use contiguous accesses. */
2355 if (*memory_access_type
== VMAT_ELEMENTWISE
2358 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2360 *memory_access_type
= VMAT_GATHER_SCATTER
;
2363 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2365 /* STMT is the leader of the group. Check the operands of all the
2366 stmts of the group. */
2367 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2368 while (next_stmt_info
)
2370 tree op
= vect_get_store_rhs (next_stmt_info
);
2371 enum vect_def_type dt
;
2372 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2376 "use not simple.\n");
2379 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2385 gcc_assert (can_overrun_p
);
2386 if (dump_enabled_p ())
2387 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2388 "Data access with gaps requires scalar "
2390 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2396 /* A subroutine of get_load_store_type, with a subset of the same
2397 arguments. Handle the case where STMT_INFO is a load or store that
2398 accesses consecutive elements with a negative step. */
2400 static vect_memory_access_type
2401 get_negative_load_store_type (stmt_vec_info stmt_info
, tree vectype
,
2402 vec_load_store_type vls_type
,
2403 unsigned int ncopies
)
2405 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2406 dr_alignment_support alignment_support_scheme
;
2410 if (dump_enabled_p ())
2411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2412 "multiple types with negative step.\n");
2413 return VMAT_ELEMENTWISE
;
2416 alignment_support_scheme
= vect_supportable_dr_alignment (dr_info
, false);
2417 if (alignment_support_scheme
!= dr_aligned
2418 && alignment_support_scheme
!= dr_unaligned_supported
)
2420 if (dump_enabled_p ())
2421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2422 "negative step but alignment required.\n");
2423 return VMAT_ELEMENTWISE
;
2426 if (vls_type
== VLS_STORE_INVARIANT
)
2428 if (dump_enabled_p ())
2429 dump_printf_loc (MSG_NOTE
, vect_location
,
2430 "negative step with invariant source;"
2431 " no permute needed.\n");
2432 return VMAT_CONTIGUOUS_DOWN
;
2435 if (!perm_mask_for_reverse (vectype
))
2437 if (dump_enabled_p ())
2438 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2439 "negative step and reversing not supported.\n");
2440 return VMAT_ELEMENTWISE
;
2443 return VMAT_CONTIGUOUS_REVERSE
;
2446 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2447 if there is a memory access type that the vectorized form can use,
2448 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2449 or scatters, fill in GS_INFO accordingly.
2451 SLP says whether we're performing SLP rather than loop vectorization.
2452 MASKED_P is true if the statement is conditional on a vectorized mask.
2453 VECTYPE is the vector type that the vectorized statements will use.
2454 NCOPIES is the number of vector statements that will be needed. */
2457 get_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2458 bool masked_p
, vec_load_store_type vls_type
,
2459 unsigned int ncopies
,
2460 vect_memory_access_type
*memory_access_type
,
2461 gather_scatter_info
*gs_info
)
2463 vec_info
*vinfo
= stmt_info
->vinfo
;
2464 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2465 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2466 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2468 *memory_access_type
= VMAT_GATHER_SCATTER
;
2469 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2471 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2472 &gs_info
->offset_dt
,
2473 &gs_info
->offset_vectype
))
2475 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2477 "%s index use not simple.\n",
2478 vls_type
== VLS_LOAD
? "gather" : "scatter");
2482 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2484 if (!get_group_load_store_type (stmt_info
, vectype
, slp
, masked_p
,
2485 vls_type
, memory_access_type
, gs_info
))
2488 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2492 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2494 *memory_access_type
= VMAT_GATHER_SCATTER
;
2496 *memory_access_type
= VMAT_ELEMENTWISE
;
2500 int cmp
= compare_step_with_zero (stmt_info
);
2502 *memory_access_type
= get_negative_load_store_type
2503 (stmt_info
, vectype
, vls_type
, ncopies
);
2506 gcc_assert (vls_type
== VLS_LOAD
);
2507 *memory_access_type
= VMAT_INVARIANT
;
2510 *memory_access_type
= VMAT_CONTIGUOUS
;
2513 if ((*memory_access_type
== VMAT_ELEMENTWISE
2514 || *memory_access_type
== VMAT_STRIDED_SLP
)
2515 && !nunits
.is_constant ())
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2519 "Not using elementwise accesses due to variable "
2520 "vectorization factor.\n");
2524 /* FIXME: At the moment the cost model seems to underestimate the
2525 cost of using elementwise accesses. This check preserves the
2526 traditional behavior until that can be fixed. */
2527 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2528 if (!first_stmt_info
)
2529 first_stmt_info
= stmt_info
;
2530 if (*memory_access_type
== VMAT_ELEMENTWISE
2531 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2532 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2533 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2534 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2538 "not falling back to elementwise accesses\n");
2544 /* Return true if boolean argument MASK is suitable for vectorizing
2545 conditional load or store STMT_INFO. When returning true, store the type
2546 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2547 in *MASK_VECTYPE_OUT. */
2550 vect_check_load_store_mask (stmt_vec_info stmt_info
, tree mask
,
2551 vect_def_type
*mask_dt_out
,
2552 tree
*mask_vectype_out
)
2554 vec_info
*vinfo
= stmt_info
->vinfo
;
2555 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2557 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2559 "mask argument is not a boolean.\n");
2563 if (TREE_CODE (mask
) != SSA_NAME
)
2565 if (dump_enabled_p ())
2566 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2567 "mask argument is not an SSA name.\n");
2571 enum vect_def_type mask_dt
;
2573 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2577 "mask use not simple.\n");
2581 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2583 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2585 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2587 if (dump_enabled_p ())
2588 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2589 "could not find an appropriate vector mask type.\n");
2593 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2594 TYPE_VECTOR_SUBPARTS (vectype
)))
2596 if (dump_enabled_p ())
2597 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2598 "vector mask type %T"
2599 " does not match vector data type %T.\n",
2600 mask_vectype
, vectype
);
2605 *mask_dt_out
= mask_dt
;
2606 *mask_vectype_out
= mask_vectype
;
2610 /* Return true if stored value RHS is suitable for vectorizing store
2611 statement STMT_INFO. When returning true, store the type of the
2612 definition in *RHS_DT_OUT, the type of the vectorized store value in
2613 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2616 vect_check_store_rhs (stmt_vec_info stmt_info
, tree rhs
,
2617 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2618 vec_load_store_type
*vls_type_out
)
2620 /* In the case this is a store from a constant make sure
2621 native_encode_expr can handle it. */
2622 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2624 if (dump_enabled_p ())
2625 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2626 "cannot encode constant as a byte sequence.\n");
2630 enum vect_def_type rhs_dt
;
2632 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2634 if (dump_enabled_p ())
2635 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2636 "use not simple.\n");
2640 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2641 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2643 if (dump_enabled_p ())
2644 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2645 "incompatible vector types.\n");
2649 *rhs_dt_out
= rhs_dt
;
2650 *rhs_vectype_out
= rhs_vectype
;
2651 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2652 *vls_type_out
= VLS_STORE_INVARIANT
;
2654 *vls_type_out
= VLS_STORE
;
2658 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2659 Note that we support masks with floating-point type, in which case the
2660 floats are interpreted as a bitmask. */
2663 vect_build_all_ones_mask (stmt_vec_info stmt_info
, tree masktype
)
2665 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2666 return build_int_cst (masktype
, -1);
2667 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2669 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2670 mask
= build_vector_from_val (masktype
, mask
);
2671 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2673 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2677 for (int j
= 0; j
< 6; ++j
)
2679 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2680 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2681 mask
= build_vector_from_val (masktype
, mask
);
2682 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2687 /* Build an all-zero merge value of type VECTYPE while vectorizing
2688 STMT_INFO as a gather load. */
2691 vect_build_zero_merge_argument (stmt_vec_info stmt_info
, tree vectype
)
2694 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2695 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2696 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2700 for (int j
= 0; j
< 6; ++j
)
2702 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2703 merge
= build_real (TREE_TYPE (vectype
), r
);
2707 merge
= build_vector_from_val (vectype
, merge
);
2708 return vect_init_vector (stmt_info
, merge
, vectype
, NULL
);
2711 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2712 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2713 the gather load operation. If the load is conditional, MASK is the
2714 unvectorized condition and MASK_DT is its definition type, otherwise
2718 vect_build_gather_load_calls (stmt_vec_info stmt_info
,
2719 gimple_stmt_iterator
*gsi
,
2720 stmt_vec_info
*vec_stmt
,
2721 gather_scatter_info
*gs_info
,
2724 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2725 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2726 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2727 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2728 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2729 edge pe
= loop_preheader_edge (loop
);
2730 enum { NARROW
, NONE
, WIDEN
} modifier
;
2731 poly_uint64 gather_off_nunits
2732 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2734 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2735 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2736 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2737 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2738 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2739 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2740 tree scaletype
= TREE_VALUE (arglist
);
2741 tree real_masktype
= masktype
;
2742 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2744 || TREE_CODE (masktype
) == INTEGER_TYPE
2745 || types_compatible_p (srctype
, masktype
)));
2746 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2747 masktype
= build_same_sized_truth_vector_type (srctype
);
2749 tree mask_halftype
= masktype
;
2750 tree perm_mask
= NULL_TREE
;
2751 tree mask_perm_mask
= NULL_TREE
;
2752 if (known_eq (nunits
, gather_off_nunits
))
2754 else if (known_eq (nunits
* 2, gather_off_nunits
))
2758 /* Currently widening gathers and scatters are only supported for
2759 fixed-length vectors. */
2760 int count
= gather_off_nunits
.to_constant ();
2761 vec_perm_builder
sel (count
, count
, 1);
2762 for (int i
= 0; i
< count
; ++i
)
2763 sel
.quick_push (i
| (count
/ 2));
2765 vec_perm_indices
indices (sel
, 1, count
);
2766 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2769 else if (known_eq (nunits
, gather_off_nunits
* 2))
2773 /* Currently narrowing gathers and scatters are only supported for
2774 fixed-length vectors. */
2775 int count
= nunits
.to_constant ();
2776 vec_perm_builder
sel (count
, count
, 1);
2777 sel
.quick_grow (count
);
2778 for (int i
= 0; i
< count
; ++i
)
2779 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2780 vec_perm_indices
indices (sel
, 2, count
);
2781 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2785 if (mask
&& masktype
== real_masktype
)
2787 for (int i
= 0; i
< count
; ++i
)
2788 sel
[i
] = i
| (count
/ 2);
2789 indices
.new_vector (sel
, 2, count
);
2790 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2794 = build_same_sized_truth_vector_type (gs_info
->offset_vectype
);
2799 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2800 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2802 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2803 if (!is_gimple_min_invariant (ptr
))
2806 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2807 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2808 gcc_assert (!new_bb
);
2811 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2813 tree vec_oprnd0
= NULL_TREE
;
2814 tree vec_mask
= NULL_TREE
;
2815 tree src_op
= NULL_TREE
;
2816 tree mask_op
= NULL_TREE
;
2817 tree prev_res
= NULL_TREE
;
2818 stmt_vec_info prev_stmt_info
= NULL
;
2822 src_op
= vect_build_zero_merge_argument (stmt_info
, rettype
);
2823 mask_op
= vect_build_all_ones_mask (stmt_info
, masktype
);
2826 for (int j
= 0; j
< ncopies
; ++j
)
2829 if (modifier
== WIDEN
&& (j
& 1))
2830 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2831 perm_mask
, stmt_info
, gsi
);
2834 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
);
2836 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2839 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2841 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2842 TYPE_VECTOR_SUBPARTS (idxtype
)));
2843 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2844 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2845 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2846 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2852 if (mask_perm_mask
&& (j
& 1))
2853 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2854 mask_perm_mask
, stmt_info
, gsi
);
2858 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
);
2859 else if (modifier
!= NARROW
|| (j
& 1) == 0)
2860 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2864 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2866 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2867 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2868 gcc_assert (known_eq (sub1
, sub2
));
2869 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2870 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2872 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2873 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2877 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2879 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2881 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2882 : VEC_UNPACK_LO_EXPR
,
2884 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2890 tree mask_arg
= mask_op
;
2891 if (masktype
!= real_masktype
)
2893 tree utype
, optype
= TREE_TYPE (mask_op
);
2894 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2895 utype
= real_masktype
;
2897 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2898 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2899 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2901 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2902 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2904 if (!useless_type_conversion_p (real_masktype
, utype
))
2906 gcc_assert (TYPE_PRECISION (utype
)
2907 <= TYPE_PRECISION (real_masktype
));
2908 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2909 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2910 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2913 src_op
= build_zero_cst (srctype
);
2915 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2918 stmt_vec_info new_stmt_info
;
2919 if (!useless_type_conversion_p (vectype
, rettype
))
2921 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2922 TYPE_VECTOR_SUBPARTS (rettype
)));
2923 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2924 gimple_call_set_lhs (new_call
, op
);
2925 vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2926 var
= make_ssa_name (vec_dest
);
2927 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2928 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2930 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2934 var
= make_ssa_name (vec_dest
, new_call
);
2935 gimple_call_set_lhs (new_call
, var
);
2937 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2940 if (modifier
== NARROW
)
2947 var
= permute_vec_elements (prev_res
, var
, perm_mask
,
2949 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2952 if (prev_stmt_info
== NULL
)
2953 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2955 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2956 prev_stmt_info
= new_stmt_info
;
2960 /* Prepare the base and offset in GS_INFO for vectorization.
2961 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2962 to the vectorized offset argument for the first copy of STMT_INFO.
2963 STMT_INFO is the statement described by GS_INFO and LOOP is the
2967 vect_get_gather_scatter_ops (class loop
*loop
, stmt_vec_info stmt_info
,
2968 gather_scatter_info
*gs_info
,
2969 tree
*dataref_ptr
, tree
*vec_offset
)
2971 vec_info
*vinfo
= stmt_info
->vinfo
;
2972 gimple_seq stmts
= NULL
;
2973 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2977 edge pe
= loop_preheader_edge (loop
);
2978 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2979 gcc_assert (!new_bb
);
2981 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2982 tree offset_vectype
= get_vectype_for_scalar_type (vinfo
, offset_type
);
2983 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
,
2987 /* Prepare to implement a grouped or strided load or store using
2988 the gather load or scatter store operation described by GS_INFO.
2989 STMT_INFO is the load or store statement.
2991 Set *DATAREF_BUMP to the amount that should be added to the base
2992 address after each copy of the vectorized statement. Set *VEC_OFFSET
2993 to an invariant offset vector in which element I has the value
2994 I * DR_STEP / SCALE. */
2997 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2998 loop_vec_info loop_vinfo
,
2999 gather_scatter_info
*gs_info
,
3000 tree
*dataref_bump
, tree
*vec_offset
)
3002 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3003 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3004 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3007 tree bump
= size_binop (MULT_EXPR
,
3008 fold_convert (sizetype
, DR_STEP (dr
)),
3009 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3010 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
3012 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3014 /* The offset given in GS_INFO can have pointer type, so use the element
3015 type of the vector instead. */
3016 tree offset_type
= TREE_TYPE (gs_info
->offset
);
3017 tree offset_vectype
= get_vectype_for_scalar_type (loop_vinfo
, offset_type
);
3018 offset_type
= TREE_TYPE (offset_vectype
);
3020 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3021 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
3022 ssize_int (gs_info
->scale
));
3023 step
= fold_convert (offset_type
, step
);
3024 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
3026 /* Create {0, X, X*2, X*3, ...}. */
3027 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, offset_vectype
,
3028 build_zero_cst (offset_type
), step
);
3030 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3033 /* Return the amount that should be added to a vector pointer to move
3034 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3035 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3039 vect_get_data_ptr_increment (dr_vec_info
*dr_info
, tree aggr_type
,
3040 vect_memory_access_type memory_access_type
)
3042 if (memory_access_type
== VMAT_INVARIANT
)
3043 return size_zero_node
;
3045 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3046 tree step
= vect_dr_behavior (dr_info
)->step
;
3047 if (tree_int_cst_sgn (step
) == -1)
3048 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3052 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3055 vectorizable_bswap (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3056 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3057 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3060 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3061 vec_info
*vinfo
= stmt_info
->vinfo
;
3062 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3065 op
= gimple_call_arg (stmt
, 0);
3066 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3067 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3069 /* Multiple types in SLP are handled by creating the appropriate number of
3070 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3075 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3077 gcc_assert (ncopies
>= 1);
3079 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3083 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3084 unsigned word_bytes
;
3085 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3088 /* The encoding uses one stepped pattern for each byte in the word. */
3089 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3090 for (unsigned i
= 0; i
< 3; ++i
)
3091 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3092 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3094 vec_perm_indices
indices (elts
, 1, num_bytes
);
3095 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3100 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3101 DUMP_VECT_SCOPE ("vectorizable_bswap");
3104 record_stmt_cost (cost_vec
,
3105 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3106 record_stmt_cost (cost_vec
,
3107 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3112 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3115 vec
<tree
> vec_oprnds
= vNULL
;
3116 stmt_vec_info new_stmt_info
= NULL
;
3117 stmt_vec_info prev_stmt_info
= NULL
;
3118 for (unsigned j
= 0; j
< ncopies
; j
++)
3122 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
3124 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3126 /* Arguments are ready. create the new vector stmt. */
3129 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3132 tree tem
= make_ssa_name (char_vectype
);
3133 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3134 char_vectype
, vop
));
3135 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3136 tree tem2
= make_ssa_name (char_vectype
);
3137 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3138 tem
, tem
, bswap_vconst
);
3139 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3140 tem
= make_ssa_name (vectype
);
3141 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3144 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3146 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3153 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3155 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3157 prev_stmt_info
= new_stmt_info
;
3160 vec_oprnds
.release ();
3164 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3165 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3166 in a single step. On success, store the binary pack code in
3170 simple_integer_narrowing (vec_info
*vinfo
, tree vectype_out
, tree vectype_in
,
3171 tree_code
*convert_code
)
3173 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3174 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3178 int multi_step_cvt
= 0;
3179 auto_vec
<tree
, 8> interm_types
;
3180 if (!supportable_narrowing_operation (vinfo
, NOP_EXPR
, vectype_out
,
3181 vectype_in
, &code
, &multi_step_cvt
,
3186 *convert_code
= code
;
3190 /* Function vectorizable_call.
3192 Check if STMT_INFO performs a function call that can be vectorized.
3193 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3194 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3195 Return true if STMT_INFO is vectorizable in this way. */
3198 vectorizable_call (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3199 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3200 stmt_vector_for_cost
*cost_vec
)
3206 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3207 stmt_vec_info prev_stmt_info
;
3208 tree vectype_out
, vectype_in
;
3209 poly_uint64 nunits_in
;
3210 poly_uint64 nunits_out
;
3211 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3212 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3213 vec_info
*vinfo
= stmt_info
->vinfo
;
3214 tree fndecl
, new_temp
, rhs_type
;
3215 enum vect_def_type dt
[4]
3216 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3217 vect_unknown_def_type
};
3218 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3219 int ndts
= ARRAY_SIZE (dt
);
3221 auto_vec
<tree
, 8> vargs
;
3222 auto_vec
<tree
, 8> orig_vargs
;
3223 enum { NARROW
, NONE
, WIDEN
} modifier
;
3227 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3230 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3234 /* Is STMT_INFO a vectorizable call? */
3235 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3239 if (gimple_call_internal_p (stmt
)
3240 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3241 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3242 /* Handled by vectorizable_load and vectorizable_store. */
3245 if (gimple_call_lhs (stmt
) == NULL_TREE
3246 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3249 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3251 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3253 /* Process function arguments. */
3254 rhs_type
= NULL_TREE
;
3255 vectype_in
= NULL_TREE
;
3256 nargs
= gimple_call_num_args (stmt
);
3258 /* Bail out if the function has more than three arguments, we do not have
3259 interesting builtin functions to vectorize with more than two arguments
3260 except for fma. No arguments is also not good. */
3261 if (nargs
== 0 || nargs
> 4)
3264 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3265 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3266 if (cfn
== CFN_GOMP_SIMD_LANE
)
3269 rhs_type
= unsigned_type_node
;
3273 if (internal_fn_p (cfn
))
3274 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3276 for (i
= 0; i
< nargs
; i
++)
3278 op
= gimple_call_arg (stmt
, i
);
3279 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &vectypes
[i
]))
3281 if (dump_enabled_p ())
3282 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3283 "use not simple.\n");
3287 /* Skip the mask argument to an internal function. This operand
3288 has been converted via a pattern if necessary. */
3289 if ((int) i
== mask_opno
)
3292 /* We can only handle calls with arguments of the same type. */
3294 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3296 if (dump_enabled_p ())
3297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3298 "argument types differ.\n");
3302 rhs_type
= TREE_TYPE (op
);
3305 vectype_in
= vectypes
[i
];
3306 else if (vectypes
[i
]
3307 && !types_compatible_p (vectypes
[i
], vectype_in
))
3309 if (dump_enabled_p ())
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3311 "argument vector types differ.\n");
3315 /* If all arguments are external or constant defs use a vector type with
3316 the same size as the output vector type. */
3318 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3320 gcc_assert (vectype_in
);
3323 if (dump_enabled_p ())
3324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3325 "no vectype for scalar type %T\n", rhs_type
);
3331 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3332 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3333 if (known_eq (nunits_in
* 2, nunits_out
))
3335 else if (known_eq (nunits_out
, nunits_in
))
3337 else if (known_eq (nunits_out
* 2, nunits_in
))
3342 /* We only handle functions that do not read or clobber memory. */
3343 if (gimple_vuse (stmt
))
3345 if (dump_enabled_p ())
3346 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3347 "function reads from or writes to memory.\n");
3351 /* For now, we only vectorize functions if a target specific builtin
3352 is available. TODO -- in some cases, it might be profitable to
3353 insert the calls for pieces of the vector, in order to be able
3354 to vectorize other operations in the loop. */
3356 internal_fn ifn
= IFN_LAST
;
3357 tree callee
= gimple_call_fndecl (stmt
);
3359 /* First try using an internal function. */
3360 tree_code convert_code
= ERROR_MARK
;
3362 && (modifier
== NONE
3363 || (modifier
== NARROW
3364 && simple_integer_narrowing (vinfo
, vectype_out
, vectype_in
,
3366 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3369 /* If that fails, try asking for a target-specific built-in function. */
3370 if (ifn
== IFN_LAST
)
3372 if (cfn
!= CFN_LAST
)
3373 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3374 (cfn
, vectype_out
, vectype_in
);
3375 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3376 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3377 (callee
, vectype_out
, vectype_in
);
3380 if (ifn
== IFN_LAST
&& !fndecl
)
3382 if (cfn
== CFN_GOMP_SIMD_LANE
3385 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3386 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3387 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3388 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3390 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3391 { 0, 1, 2, ... vf - 1 } vector. */
3392 gcc_assert (nargs
== 0);
3394 else if (modifier
== NONE
3395 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3396 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3397 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3398 return vectorizable_bswap (stmt_info
, gsi
, vec_stmt
, slp_node
,
3399 vectype_in
, cost_vec
);
3402 if (dump_enabled_p ())
3403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3404 "function is not vectorizable.\n");
3411 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3412 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3414 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3416 /* Sanity check: make sure that at least one copy of the vectorized stmt
3417 needs to be generated. */
3418 gcc_assert (ncopies
>= 1);
3420 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3421 if (!vec_stmt
) /* transformation not required. */
3423 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3424 DUMP_VECT_SCOPE ("vectorizable_call");
3425 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3426 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3427 record_stmt_cost (cost_vec
, ncopies
/ 2,
3428 vec_promote_demote
, stmt_info
, 0, vect_body
);
3430 if (loop_vinfo
&& mask_opno
>= 0)
3432 unsigned int nvectors
= (slp_node
3433 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3435 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3436 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3437 vectype_out
, scalar_mask
);
3444 if (dump_enabled_p ())
3445 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3448 scalar_dest
= gimple_call_lhs (stmt
);
3449 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3451 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3453 stmt_vec_info new_stmt_info
= NULL
;
3454 prev_stmt_info
= NULL
;
3455 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3457 tree prev_res
= NULL_TREE
;
3458 vargs
.safe_grow (nargs
);
3459 orig_vargs
.safe_grow (nargs
);
3460 for (j
= 0; j
< ncopies
; ++j
)
3462 /* Build argument list for the vectorized call. */
3465 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3466 vec
<tree
> vec_oprnds0
;
3468 vect_get_slp_defs (slp_node
, &vec_defs
);
3469 vec_oprnds0
= vec_defs
[0];
3471 /* Arguments are ready. Create the new vector stmt. */
3472 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3475 for (k
= 0; k
< nargs
; k
++)
3477 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3478 vargs
[k
] = vec_oprndsk
[i
];
3480 if (modifier
== NARROW
)
3482 /* We don't define any narrowing conditional functions
3484 gcc_assert (mask_opno
< 0);
3485 tree half_res
= make_ssa_name (vectype_in
);
3487 = gimple_build_call_internal_vec (ifn
, vargs
);
3488 gimple_call_set_lhs (call
, half_res
);
3489 gimple_call_set_nothrow (call
, true);
3490 vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3493 prev_res
= half_res
;
3496 new_temp
= make_ssa_name (vec_dest
);
3498 = gimple_build_assign (new_temp
, convert_code
,
3499 prev_res
, half_res
);
3501 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
3506 if (mask_opno
>= 0 && masked_loop_p
)
3508 unsigned int vec_num
= vec_oprnds0
.length ();
3509 /* Always true for SLP. */
3510 gcc_assert (ncopies
== 1);
3511 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3513 vargs
[mask_opno
] = prepare_load_store_mask
3514 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3518 if (ifn
!= IFN_LAST
)
3519 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3521 call
= gimple_build_call_vec (fndecl
, vargs
);
3522 new_temp
= make_ssa_name (vec_dest
, call
);
3523 gimple_call_set_lhs (call
, new_temp
);
3524 gimple_call_set_nothrow (call
, true);
3526 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3528 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3531 for (i
= 0; i
< nargs
; i
++)
3533 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3534 vec_oprndsi
.release ();
3539 if (mask_opno
>= 0 && !vectypes
[mask_opno
])
3541 gcc_assert (modifier
!= WIDEN
);
3543 = build_same_sized_truth_vector_type (vectype_in
);
3546 for (i
= 0; i
< nargs
; i
++)
3548 op
= gimple_call_arg (stmt
, i
);
3551 = vect_get_vec_def_for_operand (op
, stmt_info
, vectypes
[i
]);
3554 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3556 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3559 if (mask_opno
>= 0 && masked_loop_p
)
3561 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3564 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3565 vargs
[mask_opno
], gsi
);
3568 if (cfn
== CFN_GOMP_SIMD_LANE
)
3570 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3572 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3573 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3574 vect_init_vector_1 (stmt_info
, init_stmt
, NULL
);
3575 new_temp
= make_ssa_name (vec_dest
);
3576 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3578 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3580 else if (modifier
== NARROW
)
3582 /* We don't define any narrowing conditional functions at
3584 gcc_assert (mask_opno
< 0);
3585 tree half_res
= make_ssa_name (vectype_in
);
3586 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3587 gimple_call_set_lhs (call
, half_res
);
3588 gimple_call_set_nothrow (call
, true);
3589 vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3592 prev_res
= half_res
;
3595 new_temp
= make_ssa_name (vec_dest
);
3596 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3597 prev_res
, half_res
);
3599 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3604 if (ifn
!= IFN_LAST
)
3605 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3607 call
= gimple_build_call_vec (fndecl
, vargs
);
3608 new_temp
= make_ssa_name (vec_dest
, call
);
3609 gimple_call_set_lhs (call
, new_temp
);
3610 gimple_call_set_nothrow (call
, true);
3612 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3615 if (j
== (modifier
== NARROW
? 1 : 0))
3616 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3618 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3620 prev_stmt_info
= new_stmt_info
;
3623 else if (modifier
== NARROW
)
3625 /* We don't define any narrowing conditional functions at present. */
3626 gcc_assert (mask_opno
< 0);
3627 for (j
= 0; j
< ncopies
; ++j
)
3629 /* Build argument list for the vectorized call. */
3631 vargs
.create (nargs
* 2);
3637 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3638 vec
<tree
> vec_oprnds0
;
3640 vect_get_slp_defs (slp_node
, &vec_defs
);
3641 vec_oprnds0
= vec_defs
[0];
3643 /* Arguments are ready. Create the new vector stmt. */
3644 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3648 for (k
= 0; k
< nargs
; k
++)
3650 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3651 vargs
.quick_push (vec_oprndsk
[i
]);
3652 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3655 if (ifn
!= IFN_LAST
)
3656 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3658 call
= gimple_build_call_vec (fndecl
, vargs
);
3659 new_temp
= make_ssa_name (vec_dest
, call
);
3660 gimple_call_set_lhs (call
, new_temp
);
3661 gimple_call_set_nothrow (call
, true);
3663 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3664 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3667 for (i
= 0; i
< nargs
; i
++)
3669 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3670 vec_oprndsi
.release ();
3675 for (i
= 0; i
< nargs
; i
++)
3677 op
= gimple_call_arg (stmt
, i
);
3681 = vect_get_vec_def_for_operand (op
, stmt_info
,
3684 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3688 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3691 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3693 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3696 vargs
.quick_push (vec_oprnd0
);
3697 vargs
.quick_push (vec_oprnd1
);
3700 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3701 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3702 gimple_call_set_lhs (new_stmt
, new_temp
);
3704 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3707 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3709 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3711 prev_stmt_info
= new_stmt_info
;
3714 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3717 /* No current target implements this case. */
3722 /* The call in STMT might prevent it from being removed in dce.
3723 We however cannot remove it here, due to the way the ssa name
3724 it defines is mapped to the new definition. So just replace
3725 rhs of the statement with something harmless. */
3730 stmt_info
= vect_orig_stmt (stmt_info
);
3731 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3734 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3735 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3741 struct simd_call_arg_info
3745 HOST_WIDE_INT linear_step
;
3746 enum vect_def_type dt
;
3748 bool simd_lane_linear
;
3751 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3752 is linear within simd lane (but not within whole loop), note it in
3756 vect_simd_lane_linear (tree op
, class loop
*loop
,
3757 struct simd_call_arg_info
*arginfo
)
3759 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3761 if (!is_gimple_assign (def_stmt
)
3762 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3763 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3766 tree base
= gimple_assign_rhs1 (def_stmt
);
3767 HOST_WIDE_INT linear_step
= 0;
3768 tree v
= gimple_assign_rhs2 (def_stmt
);
3769 while (TREE_CODE (v
) == SSA_NAME
)
3772 def_stmt
= SSA_NAME_DEF_STMT (v
);
3773 if (is_gimple_assign (def_stmt
))
3774 switch (gimple_assign_rhs_code (def_stmt
))
3777 t
= gimple_assign_rhs2 (def_stmt
);
3778 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3780 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3781 v
= gimple_assign_rhs1 (def_stmt
);
3784 t
= gimple_assign_rhs2 (def_stmt
);
3785 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3787 linear_step
= tree_to_shwi (t
);
3788 v
= gimple_assign_rhs1 (def_stmt
);
3791 t
= gimple_assign_rhs1 (def_stmt
);
3792 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3793 || (TYPE_PRECISION (TREE_TYPE (v
))
3794 < TYPE_PRECISION (TREE_TYPE (t
))))
3803 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3805 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3806 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3811 arginfo
->linear_step
= linear_step
;
3813 arginfo
->simd_lane_linear
= true;
3819 /* Return the number of elements in vector type VECTYPE, which is associated
3820 with a SIMD clone. At present these vectors always have a constant
3823 static unsigned HOST_WIDE_INT
3824 simd_clone_subparts (tree vectype
)
3826 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3829 /* Function vectorizable_simd_clone_call.
3831 Check if STMT_INFO performs a function call that can be vectorized
3832 by calling a simd clone of the function.
3833 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3834 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3835 Return true if STMT_INFO is vectorizable in this way. */
3838 vectorizable_simd_clone_call (stmt_vec_info stmt_info
,
3839 gimple_stmt_iterator
*gsi
,
3840 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3841 stmt_vector_for_cost
*)
3846 tree vec_oprnd0
= NULL_TREE
;
3847 stmt_vec_info prev_stmt_info
;
3849 unsigned int nunits
;
3850 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3851 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3852 vec_info
*vinfo
= stmt_info
->vinfo
;
3853 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3854 tree fndecl
, new_temp
;
3856 auto_vec
<simd_call_arg_info
> arginfo
;
3857 vec
<tree
> vargs
= vNULL
;
3859 tree lhs
, rtype
, ratype
;
3860 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3862 /* Is STMT a vectorizable call? */
3863 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3867 fndecl
= gimple_call_fndecl (stmt
);
3868 if (fndecl
== NULL_TREE
)
3871 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3872 if (node
== NULL
|| node
->simd_clones
== NULL
)
3875 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3878 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3882 if (gimple_call_lhs (stmt
)
3883 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3886 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3888 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3890 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3897 /* Process function arguments. */
3898 nargs
= gimple_call_num_args (stmt
);
3900 /* Bail out if the function has zero arguments. */
3904 arginfo
.reserve (nargs
, true);
3906 for (i
= 0; i
< nargs
; i
++)
3908 simd_call_arg_info thisarginfo
;
3911 thisarginfo
.linear_step
= 0;
3912 thisarginfo
.align
= 0;
3913 thisarginfo
.op
= NULL_TREE
;
3914 thisarginfo
.simd_lane_linear
= false;
3916 op
= gimple_call_arg (stmt
, i
);
3917 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3918 &thisarginfo
.vectype
)
3919 || thisarginfo
.dt
== vect_uninitialized_def
)
3921 if (dump_enabled_p ())
3922 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3923 "use not simple.\n");
3927 if (thisarginfo
.dt
== vect_constant_def
3928 || thisarginfo
.dt
== vect_external_def
)
3929 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3931 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3933 /* For linear arguments, the analyze phase should have saved
3934 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3935 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3936 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3938 gcc_assert (vec_stmt
);
3939 thisarginfo
.linear_step
3940 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3942 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3943 thisarginfo
.simd_lane_linear
3944 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3945 == boolean_true_node
);
3946 /* If loop has been peeled for alignment, we need to adjust it. */
3947 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3948 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3949 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3951 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3952 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3953 tree opt
= TREE_TYPE (thisarginfo
.op
);
3954 bias
= fold_convert (TREE_TYPE (step
), bias
);
3955 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3957 = fold_build2 (POINTER_TYPE_P (opt
)
3958 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3959 thisarginfo
.op
, bias
);
3963 && thisarginfo
.dt
!= vect_constant_def
3964 && thisarginfo
.dt
!= vect_external_def
3966 && TREE_CODE (op
) == SSA_NAME
3967 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3969 && tree_fits_shwi_p (iv
.step
))
3971 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3972 thisarginfo
.op
= iv
.base
;
3974 else if ((thisarginfo
.dt
== vect_constant_def
3975 || thisarginfo
.dt
== vect_external_def
)
3976 && POINTER_TYPE_P (TREE_TYPE (op
)))
3977 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3978 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3980 if (POINTER_TYPE_P (TREE_TYPE (op
))
3981 && !thisarginfo
.linear_step
3983 && thisarginfo
.dt
!= vect_constant_def
3984 && thisarginfo
.dt
!= vect_external_def
3987 && TREE_CODE (op
) == SSA_NAME
)
3988 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3990 arginfo
.quick_push (thisarginfo
);
3993 unsigned HOST_WIDE_INT vf
;
3994 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3996 if (dump_enabled_p ())
3997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3998 "not considering SIMD clones; not yet supported"
3999 " for variable-width vectors.\n");
4003 unsigned int badness
= 0;
4004 struct cgraph_node
*bestn
= NULL
;
4005 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4006 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4008 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4009 n
= n
->simdclone
->next_clone
)
4011 unsigned int this_badness
= 0;
4012 if (n
->simdclone
->simdlen
> vf
4013 || n
->simdclone
->nargs
!= nargs
)
4015 if (n
->simdclone
->simdlen
< vf
)
4016 this_badness
+= (exact_log2 (vf
)
4017 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
4018 if (n
->simdclone
->inbranch
)
4019 this_badness
+= 2048;
4020 int target_badness
= targetm
.simd_clone
.usable (n
);
4021 if (target_badness
< 0)
4023 this_badness
+= target_badness
* 512;
4024 /* FORNOW: Have to add code to add the mask argument. */
4025 if (n
->simdclone
->inbranch
)
4027 for (i
= 0; i
< nargs
; i
++)
4029 switch (n
->simdclone
->args
[i
].arg_type
)
4031 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4032 if (!useless_type_conversion_p
4033 (n
->simdclone
->args
[i
].orig_type
,
4034 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4036 else if (arginfo
[i
].dt
== vect_constant_def
4037 || arginfo
[i
].dt
== vect_external_def
4038 || arginfo
[i
].linear_step
)
4041 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4042 if (arginfo
[i
].dt
!= vect_constant_def
4043 && arginfo
[i
].dt
!= vect_external_def
)
4046 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4047 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4048 if (arginfo
[i
].dt
== vect_constant_def
4049 || arginfo
[i
].dt
== vect_external_def
4050 || (arginfo
[i
].linear_step
4051 != n
->simdclone
->args
[i
].linear_step
))
4054 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4055 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4056 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4057 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4058 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4059 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4063 case SIMD_CLONE_ARG_TYPE_MASK
:
4066 if (i
== (size_t) -1)
4068 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4073 if (arginfo
[i
].align
)
4074 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4075 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4077 if (i
== (size_t) -1)
4079 if (bestn
== NULL
|| this_badness
< badness
)
4082 badness
= this_badness
;
4089 for (i
= 0; i
< nargs
; i
++)
4090 if ((arginfo
[i
].dt
== vect_constant_def
4091 || arginfo
[i
].dt
== vect_external_def
)
4092 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4094 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4095 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
);
4096 if (arginfo
[i
].vectype
== NULL
4097 || (simd_clone_subparts (arginfo
[i
].vectype
)
4098 > bestn
->simdclone
->simdlen
))
4102 fndecl
= bestn
->decl
;
4103 nunits
= bestn
->simdclone
->simdlen
;
4104 ncopies
= vf
/ nunits
;
4106 /* If the function isn't const, only allow it in simd loops where user
4107 has asserted that at least nunits consecutive iterations can be
4108 performed using SIMD instructions. */
4109 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4110 && gimple_vuse (stmt
))
4113 /* Sanity check: make sure that at least one copy of the vectorized stmt
4114 needs to be generated. */
4115 gcc_assert (ncopies
>= 1);
4117 if (!vec_stmt
) /* transformation not required. */
4119 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4120 for (i
= 0; i
< nargs
; i
++)
4121 if ((bestn
->simdclone
->args
[i
].arg_type
4122 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4123 || (bestn
->simdclone
->args
[i
].arg_type
4124 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4126 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4128 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4129 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4130 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4131 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4132 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4133 tree sll
= arginfo
[i
].simd_lane_linear
4134 ? boolean_true_node
: boolean_false_node
;
4135 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4137 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4138 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4139 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4145 if (dump_enabled_p ())
4146 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4149 scalar_dest
= gimple_call_lhs (stmt
);
4150 vec_dest
= NULL_TREE
;
4155 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4156 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4157 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4160 rtype
= TREE_TYPE (ratype
);
4164 prev_stmt_info
= NULL
;
4165 for (j
= 0; j
< ncopies
; ++j
)
4167 /* Build argument list for the vectorized call. */
4169 vargs
.create (nargs
);
4173 for (i
= 0; i
< nargs
; i
++)
4175 unsigned int k
, l
, m
, o
;
4177 op
= gimple_call_arg (stmt
, i
);
4178 switch (bestn
->simdclone
->args
[i
].arg_type
)
4180 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4181 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4182 o
= nunits
/ simd_clone_subparts (atype
);
4183 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4185 if (simd_clone_subparts (atype
)
4186 < simd_clone_subparts (arginfo
[i
].vectype
))
4188 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4189 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4190 / simd_clone_subparts (atype
));
4191 gcc_assert ((k
& (k
- 1)) == 0);
4194 = vect_get_vec_def_for_operand (op
, stmt_info
);
4197 vec_oprnd0
= arginfo
[i
].op
;
4198 if ((m
& (k
- 1)) == 0)
4200 = vect_get_vec_def_for_stmt_copy (vinfo
,
4203 arginfo
[i
].op
= vec_oprnd0
;
4205 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4207 bitsize_int ((m
& (k
- 1)) * prec
));
4209 = gimple_build_assign (make_ssa_name (atype
),
4211 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4212 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4216 k
= (simd_clone_subparts (atype
)
4217 / simd_clone_subparts (arginfo
[i
].vectype
));
4218 gcc_assert ((k
& (k
- 1)) == 0);
4219 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4221 vec_alloc (ctor_elts
, k
);
4224 for (l
= 0; l
< k
; l
++)
4226 if (m
== 0 && l
== 0)
4228 = vect_get_vec_def_for_operand (op
, stmt_info
);
4231 = vect_get_vec_def_for_stmt_copy (vinfo
,
4233 arginfo
[i
].op
= vec_oprnd0
;
4236 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4240 vargs
.safe_push (vec_oprnd0
);
4243 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4245 = gimple_build_assign (make_ssa_name (atype
),
4247 vect_finish_stmt_generation (stmt_info
, new_stmt
,
4249 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4254 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4255 vargs
.safe_push (op
);
4257 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4258 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4263 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
4268 edge pe
= loop_preheader_edge (loop
);
4269 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4270 gcc_assert (!new_bb
);
4272 if (arginfo
[i
].simd_lane_linear
)
4274 vargs
.safe_push (arginfo
[i
].op
);
4277 tree phi_res
= copy_ssa_name (op
);
4278 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4279 loop_vinfo
->add_stmt (new_phi
);
4280 add_phi_arg (new_phi
, arginfo
[i
].op
,
4281 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4283 = POINTER_TYPE_P (TREE_TYPE (op
))
4284 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4285 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4286 ? sizetype
: TREE_TYPE (op
);
4288 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4290 tree tcst
= wide_int_to_tree (type
, cst
);
4291 tree phi_arg
= copy_ssa_name (op
);
4293 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4294 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4295 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4296 loop_vinfo
->add_stmt (new_stmt
);
4297 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4299 arginfo
[i
].op
= phi_res
;
4300 vargs
.safe_push (phi_res
);
4305 = POINTER_TYPE_P (TREE_TYPE (op
))
4306 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4307 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4308 ? sizetype
: TREE_TYPE (op
);
4310 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4312 tree tcst
= wide_int_to_tree (type
, cst
);
4313 new_temp
= make_ssa_name (TREE_TYPE (op
));
4315 = gimple_build_assign (new_temp
, code
,
4316 arginfo
[i
].op
, tcst
);
4317 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4318 vargs
.safe_push (new_temp
);
4321 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4322 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4323 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4324 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4325 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4326 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4332 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4335 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4337 new_temp
= create_tmp_var (ratype
);
4338 else if (simd_clone_subparts (vectype
)
4339 == simd_clone_subparts (rtype
))
4340 new_temp
= make_ssa_name (vec_dest
, new_call
);
4342 new_temp
= make_ssa_name (rtype
, new_call
);
4343 gimple_call_set_lhs (new_call
, new_temp
);
4345 stmt_vec_info new_stmt_info
4346 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
4350 if (simd_clone_subparts (vectype
) < nunits
)
4353 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4354 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4355 k
= nunits
/ simd_clone_subparts (vectype
);
4356 gcc_assert ((k
& (k
- 1)) == 0);
4357 for (l
= 0; l
< k
; l
++)
4362 t
= build_fold_addr_expr (new_temp
);
4363 t
= build2 (MEM_REF
, vectype
, t
,
4364 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4367 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4368 bitsize_int (prec
), bitsize_int (l
* prec
));
4370 = gimple_build_assign (make_ssa_name (vectype
), t
);
4372 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4374 if (j
== 0 && l
== 0)
4375 STMT_VINFO_VEC_STMT (stmt_info
)
4376 = *vec_stmt
= new_stmt_info
;
4378 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4380 prev_stmt_info
= new_stmt_info
;
4384 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4387 else if (simd_clone_subparts (vectype
) > nunits
)
4389 unsigned int k
= (simd_clone_subparts (vectype
)
4390 / simd_clone_subparts (rtype
));
4391 gcc_assert ((k
& (k
- 1)) == 0);
4392 if ((j
& (k
- 1)) == 0)
4393 vec_alloc (ret_ctor_elts
, k
);
4396 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4397 for (m
= 0; m
< o
; m
++)
4399 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4400 size_int (m
), NULL_TREE
, NULL_TREE
);
4402 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4404 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
4406 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4407 gimple_assign_lhs (new_stmt
));
4409 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4412 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4413 if ((j
& (k
- 1)) != k
- 1)
4415 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4417 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4419 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4421 if ((unsigned) j
== k
- 1)
4422 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4424 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4426 prev_stmt_info
= new_stmt_info
;
4431 tree t
= build_fold_addr_expr (new_temp
);
4432 t
= build2 (MEM_REF
, vectype
, t
,
4433 build_int_cst (TREE_TYPE (t
), 0));
4435 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4437 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4438 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4443 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4445 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4447 prev_stmt_info
= new_stmt_info
;
4452 /* The call in STMT might prevent it from being removed in dce.
4453 We however cannot remove it here, due to the way the ssa name
4454 it defines is mapped to the new definition. So just replace
4455 rhs of the statement with something harmless. */
4463 type
= TREE_TYPE (scalar_dest
);
4464 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4465 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4468 new_stmt
= gimple_build_nop ();
4469 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4470 unlink_stmt_vdef (stmt
);
4476 /* Function vect_gen_widened_results_half
4478 Create a vector stmt whose code, type, number of arguments, and result
4479 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4480 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4481 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4482 needs to be created (DECL is a function-decl of a target-builtin).
4483 STMT_INFO is the original scalar stmt that we are vectorizing. */
4486 vect_gen_widened_results_half (enum tree_code code
,
4488 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4489 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4490 stmt_vec_info stmt_info
)
4495 /* Generate half of the widened result: */
4496 if (code
== CALL_EXPR
)
4498 /* Target specific support */
4499 if (op_type
== binary_op
)
4500 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4502 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4503 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4504 gimple_call_set_lhs (new_stmt
, new_temp
);
4508 /* Generic support */
4509 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4510 if (op_type
!= binary_op
)
4512 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4513 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4514 gimple_assign_set_lhs (new_stmt
, new_temp
);
4516 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4522 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4523 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4524 containing scalar operand), and for the rest we get a copy with
4525 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4526 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4527 The vectors are collected into VEC_OPRNDS. */
4530 vect_get_loop_based_defs (tree
*oprnd
, stmt_vec_info stmt_info
,
4531 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4533 vec_info
*vinfo
= stmt_info
->vinfo
;
4536 /* Get first vector operand. */
4537 /* All the vector operands except the very first one (that is scalar oprnd)
4539 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4540 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt_info
);
4542 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4544 vec_oprnds
->quick_push (vec_oprnd
);
4546 /* Get second vector operand. */
4547 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4548 vec_oprnds
->quick_push (vec_oprnd
);
4552 /* For conversion in multiple steps, continue to get operands
4555 vect_get_loop_based_defs (oprnd
, stmt_info
, vec_oprnds
,
4556 multi_step_cvt
- 1);
4560 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4561 For multi-step conversions store the resulting vectors and call the function
4565 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4567 stmt_vec_info stmt_info
,
4569 gimple_stmt_iterator
*gsi
,
4570 slp_tree slp_node
, enum tree_code code
,
4571 stmt_vec_info
*prev_stmt_info
)
4574 tree vop0
, vop1
, new_tmp
, vec_dest
;
4576 vec_dest
= vec_dsts
.pop ();
4578 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4580 /* Create demotion operation. */
4581 vop0
= (*vec_oprnds
)[i
];
4582 vop1
= (*vec_oprnds
)[i
+ 1];
4583 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4584 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4585 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4586 stmt_vec_info new_stmt_info
4587 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4590 /* Store the resulting vector for next recursive call. */
4591 (*vec_oprnds
)[i
/2] = new_tmp
;
4594 /* This is the last step of the conversion sequence. Store the
4595 vectors in SLP_NODE or in vector info of the scalar statement
4596 (or in STMT_VINFO_RELATED_STMT chain). */
4598 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4601 if (!*prev_stmt_info
)
4602 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4604 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4606 *prev_stmt_info
= new_stmt_info
;
4611 /* For multi-step demotion operations we first generate demotion operations
4612 from the source type to the intermediate types, and then combine the
4613 results (stored in VEC_OPRNDS) in demotion operation to the destination
4617 /* At each level of recursion we have half of the operands we had at the
4619 vec_oprnds
->truncate ((i
+1)/2);
4620 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4621 stmt_info
, vec_dsts
, gsi
,
4622 slp_node
, VEC_PACK_TRUNC_EXPR
,
4626 vec_dsts
.quick_push (vec_dest
);
4630 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4631 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4632 STMT_INFO. For multi-step conversions store the resulting vectors and
4633 call the function recursively. */
4636 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4637 vec
<tree
> *vec_oprnds1
,
4638 stmt_vec_info stmt_info
, tree vec_dest
,
4639 gimple_stmt_iterator
*gsi
,
4640 enum tree_code code1
,
4641 enum tree_code code2
, tree decl1
,
4642 tree decl2
, int op_type
)
4645 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4646 gimple
*new_stmt1
, *new_stmt2
;
4647 vec
<tree
> vec_tmp
= vNULL
;
4649 vec_tmp
.create (vec_oprnds0
->length () * 2);
4650 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4652 if (op_type
== binary_op
)
4653 vop1
= (*vec_oprnds1
)[i
];
4657 /* Generate the two halves of promotion operation. */
4658 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4659 op_type
, vec_dest
, gsi
,
4661 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4662 op_type
, vec_dest
, gsi
,
4664 if (is_gimple_call (new_stmt1
))
4666 new_tmp1
= gimple_call_lhs (new_stmt1
);
4667 new_tmp2
= gimple_call_lhs (new_stmt2
);
4671 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4672 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4675 /* Store the results for the next step. */
4676 vec_tmp
.quick_push (new_tmp1
);
4677 vec_tmp
.quick_push (new_tmp2
);
4680 vec_oprnds0
->release ();
4681 *vec_oprnds0
= vec_tmp
;
4685 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4686 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4687 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4688 Return true if STMT_INFO is vectorizable in this way. */
4691 vectorizable_conversion (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4692 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4693 stmt_vector_for_cost
*cost_vec
)
4697 tree op0
, op1
= NULL_TREE
;
4698 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4699 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4700 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4701 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4702 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4704 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4706 stmt_vec_info prev_stmt_info
;
4707 poly_uint64 nunits_in
;
4708 poly_uint64 nunits_out
;
4709 tree vectype_out
, vectype_in
;
4711 tree lhs_type
, rhs_type
;
4712 enum { NARROW
, NONE
, WIDEN
} modifier
;
4713 vec
<tree
> vec_oprnds0
= vNULL
;
4714 vec
<tree
> vec_oprnds1
= vNULL
;
4716 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4717 vec_info
*vinfo
= stmt_info
->vinfo
;
4718 int multi_step_cvt
= 0;
4719 vec
<tree
> interm_types
= vNULL
;
4720 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4722 unsigned short fltsz
;
4724 /* Is STMT a vectorizable conversion? */
4726 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4729 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4733 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4737 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4740 code
= gimple_assign_rhs_code (stmt
);
4741 if (!CONVERT_EXPR_CODE_P (code
)
4742 && code
!= FIX_TRUNC_EXPR
4743 && code
!= FLOAT_EXPR
4744 && code
!= WIDEN_MULT_EXPR
4745 && code
!= WIDEN_LSHIFT_EXPR
)
4748 op_type
= TREE_CODE_LENGTH (code
);
4750 /* Check types of lhs and rhs. */
4751 scalar_dest
= gimple_assign_lhs (stmt
);
4752 lhs_type
= TREE_TYPE (scalar_dest
);
4753 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4755 op0
= gimple_assign_rhs1 (stmt
);
4756 rhs_type
= TREE_TYPE (op0
);
4758 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4759 && !((INTEGRAL_TYPE_P (lhs_type
)
4760 && INTEGRAL_TYPE_P (rhs_type
))
4761 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4762 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4765 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4766 && ((INTEGRAL_TYPE_P (lhs_type
)
4767 && !type_has_mode_precision_p (lhs_type
))
4768 || (INTEGRAL_TYPE_P (rhs_type
)
4769 && !type_has_mode_precision_p (rhs_type
))))
4771 if (dump_enabled_p ())
4772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4773 "type conversion to/from bit-precision unsupported."
4778 /* Check the operands of the operation. */
4779 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4781 if (dump_enabled_p ())
4782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4783 "use not simple.\n");
4786 if (op_type
== binary_op
)
4790 op1
= gimple_assign_rhs2 (stmt
);
4791 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4792 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4794 if (CONSTANT_CLASS_P (op0
))
4795 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4797 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4801 if (dump_enabled_p ())
4802 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4803 "use not simple.\n");
4808 /* If op0 is an external or constant defs use a vector type of
4809 the same size as the output vector type. */
4811 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4813 gcc_assert (vectype_in
);
4816 if (dump_enabled_p ())
4817 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4818 "no vectype for scalar type %T\n", rhs_type
);
4823 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4824 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4826 if (dump_enabled_p ())
4827 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4828 "can't convert between boolean and non "
4829 "boolean vectors %T\n", rhs_type
);
4834 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4835 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4836 if (known_eq (nunits_out
, nunits_in
))
4838 else if (multiple_p (nunits_out
, nunits_in
))
4842 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4846 /* Multiple types in SLP are handled by creating the appropriate number of
4847 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4851 else if (modifier
== NARROW
)
4852 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4854 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4856 /* Sanity check: make sure that at least one copy of the vectorized stmt
4857 needs to be generated. */
4858 gcc_assert (ncopies
>= 1);
4860 bool found_mode
= false;
4861 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4862 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4863 opt_scalar_mode rhs_mode_iter
;
4865 /* Supportable by target? */
4869 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4871 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4876 if (dump_enabled_p ())
4877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4878 "conversion not supported by target.\n");
4882 if (supportable_widening_operation (code
, stmt_info
, vectype_out
,
4883 vectype_in
, &code1
, &code2
,
4884 &multi_step_cvt
, &interm_types
))
4886 /* Binary widening operation can only be supported directly by the
4888 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4892 if (code
!= FLOAT_EXPR
4893 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4896 fltsz
= GET_MODE_SIZE (lhs_mode
);
4897 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4899 rhs_mode
= rhs_mode_iter
.require ();
4900 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4904 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4905 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4906 if (cvt_type
== NULL_TREE
)
4909 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4911 if (!supportable_convert_operation (code
, vectype_out
,
4912 cvt_type
, &decl1
, &codecvt1
))
4915 else if (!supportable_widening_operation (code
, stmt_info
,
4916 vectype_out
, cvt_type
,
4917 &codecvt1
, &codecvt2
,
4922 gcc_assert (multi_step_cvt
== 0);
4924 if (supportable_widening_operation (NOP_EXPR
, stmt_info
, cvt_type
,
4925 vectype_in
, &code1
, &code2
,
4926 &multi_step_cvt
, &interm_types
))
4936 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4937 codecvt2
= ERROR_MARK
;
4941 interm_types
.safe_push (cvt_type
);
4942 cvt_type
= NULL_TREE
;
4947 gcc_assert (op_type
== unary_op
);
4948 if (supportable_narrowing_operation (vinfo
, code
, vectype_out
,
4949 vectype_in
, &code1
, &multi_step_cvt
,
4953 if (code
!= FIX_TRUNC_EXPR
4954 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4958 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4959 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4960 if (cvt_type
== NULL_TREE
)
4962 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4965 if (supportable_narrowing_operation (vinfo
, NOP_EXPR
, vectype_out
,
4966 cvt_type
, &code1
, &multi_step_cvt
,
4975 if (!vec_stmt
) /* transformation not required. */
4977 DUMP_VECT_SCOPE ("vectorizable_conversion");
4978 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4980 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4981 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4984 else if (modifier
== NARROW
)
4986 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4987 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4992 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4993 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4996 interm_types
.release ();
5001 if (dump_enabled_p ())
5002 dump_printf_loc (MSG_NOTE
, vect_location
,
5003 "transform conversion. ncopies = %d.\n", ncopies
);
5005 if (op_type
== binary_op
)
5007 if (CONSTANT_CLASS_P (op0
))
5008 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5009 else if (CONSTANT_CLASS_P (op1
))
5010 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5013 /* In case of multi-step conversion, we first generate conversion operations
5014 to the intermediate types, and then from that types to the final one.
5015 We create vector destinations for the intermediate type (TYPES) received
5016 from supportable_*_operation, and store them in the correct order
5017 for future use in vect_create_vectorized_*_stmts (). */
5018 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5019 vec_dest
= vect_create_destination_var (scalar_dest
,
5020 (cvt_type
&& modifier
== WIDEN
)
5021 ? cvt_type
: vectype_out
);
5022 vec_dsts
.quick_push (vec_dest
);
5026 for (i
= interm_types
.length () - 1;
5027 interm_types
.iterate (i
, &intermediate_type
); i
--)
5029 vec_dest
= vect_create_destination_var (scalar_dest
,
5031 vec_dsts
.quick_push (vec_dest
);
5036 vec_dest
= vect_create_destination_var (scalar_dest
,
5038 ? vectype_out
: cvt_type
);
5042 if (modifier
== WIDEN
)
5044 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
5045 if (op_type
== binary_op
)
5046 vec_oprnds1
.create (1);
5048 else if (modifier
== NARROW
)
5049 vec_oprnds0
.create (
5050 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
5052 else if (code
== WIDEN_LSHIFT_EXPR
)
5053 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5056 prev_stmt_info
= NULL
;
5060 for (j
= 0; j
< ncopies
; j
++)
5063 vect_get_vec_defs (op0
, NULL
, stmt_info
, &vec_oprnds0
,
5066 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
5068 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5070 stmt_vec_info new_stmt_info
;
5071 /* Arguments are ready, create the new vector stmt. */
5072 if (code1
== CALL_EXPR
)
5074 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5075 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5076 gimple_call_set_lhs (new_stmt
, new_temp
);
5078 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5082 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5084 = gimple_build_assign (vec_dest
, code1
, vop0
);
5085 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5086 gimple_assign_set_lhs (new_stmt
, new_temp
);
5088 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5092 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5095 if (!prev_stmt_info
)
5096 STMT_VINFO_VEC_STMT (stmt_info
)
5097 = *vec_stmt
= new_stmt_info
;
5099 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5100 prev_stmt_info
= new_stmt_info
;
5107 /* In case the vectorization factor (VF) is bigger than the number
5108 of elements that we can fit in a vectype (nunits), we have to
5109 generate more than one vector stmt - i.e - we need to "unroll"
5110 the vector stmt by a factor VF/nunits. */
5111 for (j
= 0; j
< ncopies
; j
++)
5118 if (code
== WIDEN_LSHIFT_EXPR
)
5123 /* Store vec_oprnd1 for every vector stmt to be created
5124 for SLP_NODE. We check during the analysis that all
5125 the shift arguments are the same. */
5126 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5127 vec_oprnds1
.quick_push (vec_oprnd1
);
5129 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
,
5130 &vec_oprnds0
, NULL
, slp_node
);
5133 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
5134 &vec_oprnds1
, slp_node
);
5138 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt_info
);
5139 vec_oprnds0
.quick_push (vec_oprnd0
);
5140 if (op_type
== binary_op
)
5142 if (code
== WIDEN_LSHIFT_EXPR
)
5146 = vect_get_vec_def_for_operand (op1
, stmt_info
);
5147 vec_oprnds1
.quick_push (vec_oprnd1
);
5153 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5154 vec_oprnds0
.truncate (0);
5155 vec_oprnds0
.quick_push (vec_oprnd0
);
5156 if (op_type
== binary_op
)
5158 if (code
== WIDEN_LSHIFT_EXPR
)
5161 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5163 vec_oprnds1
.truncate (0);
5164 vec_oprnds1
.quick_push (vec_oprnd1
);
5168 /* Arguments are ready. Create the new vector stmts. */
5169 for (i
= multi_step_cvt
; i
>= 0; i
--)
5171 tree this_dest
= vec_dsts
[i
];
5172 enum tree_code c1
= code1
, c2
= code2
;
5173 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5178 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5179 &vec_oprnds1
, stmt_info
,
5181 c1
, c2
, decl1
, decl2
,
5185 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5187 stmt_vec_info new_stmt_info
;
5190 if (codecvt1
== CALL_EXPR
)
5192 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5193 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5194 gimple_call_set_lhs (new_stmt
, new_temp
);
5196 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5201 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5202 new_temp
= make_ssa_name (vec_dest
);
5204 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5206 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5211 new_stmt_info
= vinfo
->lookup_def (vop0
);
5214 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5217 if (!prev_stmt_info
)
5218 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5220 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5221 prev_stmt_info
= new_stmt_info
;
5226 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5230 /* In case the vectorization factor (VF) is bigger than the number
5231 of elements that we can fit in a vectype (nunits), we have to
5232 generate more than one vector stmt - i.e - we need to "unroll"
5233 the vector stmt by a factor VF/nunits. */
5234 for (j
= 0; j
< ncopies
; j
++)
5238 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5242 vec_oprnds0
.truncate (0);
5243 vect_get_loop_based_defs (&last_oprnd
, stmt_info
, &vec_oprnds0
,
5244 vect_pow2 (multi_step_cvt
) - 1);
5247 /* Arguments are ready. Create the new vector stmts. */
5249 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5251 if (codecvt1
== CALL_EXPR
)
5253 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5254 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5255 gimple_call_set_lhs (new_stmt
, new_temp
);
5256 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5260 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5261 new_temp
= make_ssa_name (vec_dest
);
5263 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5264 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5267 vec_oprnds0
[i
] = new_temp
;
5270 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5271 stmt_info
, vec_dsts
, gsi
,
5276 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5280 vec_oprnds0
.release ();
5281 vec_oprnds1
.release ();
5282 interm_types
.release ();
5288 /* Function vectorizable_assignment.
5290 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5291 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5292 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5293 Return true if STMT_INFO is vectorizable in this way. */
5296 vectorizable_assignment (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5297 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5298 stmt_vector_for_cost
*cost_vec
)
5303 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5305 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5309 vec
<tree
> vec_oprnds
= vNULL
;
5311 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5312 vec_info
*vinfo
= stmt_info
->vinfo
;
5313 stmt_vec_info prev_stmt_info
= NULL
;
5314 enum tree_code code
;
5317 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5320 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5324 /* Is vectorizable assignment? */
5325 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5329 scalar_dest
= gimple_assign_lhs (stmt
);
5330 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5333 code
= gimple_assign_rhs_code (stmt
);
5334 if (gimple_assign_single_p (stmt
)
5335 || code
== PAREN_EXPR
5336 || CONVERT_EXPR_CODE_P (code
))
5337 op
= gimple_assign_rhs1 (stmt
);
5341 if (code
== VIEW_CONVERT_EXPR
)
5342 op
= TREE_OPERAND (op
, 0);
5344 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5345 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5347 /* Multiple types in SLP are handled by creating the appropriate number of
5348 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5353 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5355 gcc_assert (ncopies
>= 1);
5357 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5359 if (dump_enabled_p ())
5360 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5361 "use not simple.\n");
5365 /* We can handle NOP_EXPR conversions that do not change the number
5366 of elements or the vector size. */
5367 if ((CONVERT_EXPR_CODE_P (code
)
5368 || code
== VIEW_CONVERT_EXPR
)
5370 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5371 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5372 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5375 /* We do not handle bit-precision changes. */
5376 if ((CONVERT_EXPR_CODE_P (code
)
5377 || code
== VIEW_CONVERT_EXPR
)
5378 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5379 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5380 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5381 /* But a conversion that does not change the bit-pattern is ok. */
5382 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5383 > TYPE_PRECISION (TREE_TYPE (op
)))
5384 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5385 /* Conversion between boolean types of different sizes is
5386 a simple assignment in case their vectypes are same
5388 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5389 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5391 if (dump_enabled_p ())
5392 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5393 "type conversion to/from bit-precision "
5398 if (!vec_stmt
) /* transformation not required. */
5400 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5401 DUMP_VECT_SCOPE ("vectorizable_assignment");
5402 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5407 if (dump_enabled_p ())
5408 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5411 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5414 for (j
= 0; j
< ncopies
; j
++)
5418 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
5420 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5422 /* Arguments are ready. create the new vector stmt. */
5423 stmt_vec_info new_stmt_info
= NULL
;
5424 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5426 if (CONVERT_EXPR_CODE_P (code
)
5427 || code
== VIEW_CONVERT_EXPR
)
5428 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5429 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5430 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5431 gimple_assign_set_lhs (new_stmt
, new_temp
);
5433 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5435 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5442 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5444 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5446 prev_stmt_info
= new_stmt_info
;
5449 vec_oprnds
.release ();
5454 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5455 either as shift by a scalar or by a vector. */
5458 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5461 machine_mode vec_mode
;
5466 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5470 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5472 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5474 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5476 || (optab_handler (optab
, TYPE_MODE (vectype
))
5477 == CODE_FOR_nothing
))
5481 vec_mode
= TYPE_MODE (vectype
);
5482 icode
= (int) optab_handler (optab
, vec_mode
);
5483 if (icode
== CODE_FOR_nothing
)
5490 /* Function vectorizable_shift.
5492 Check if STMT_INFO performs a shift operation that can be vectorized.
5493 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5494 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5495 Return true if STMT_INFO is vectorizable in this way. */
5498 vectorizable_shift (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5499 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5500 stmt_vector_for_cost
*cost_vec
)
5504 tree op0
, op1
= NULL
;
5505 tree vec_oprnd1
= NULL_TREE
;
5507 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5508 enum tree_code code
;
5509 machine_mode vec_mode
;
5513 machine_mode optab_op2_mode
;
5514 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5516 stmt_vec_info prev_stmt_info
;
5517 poly_uint64 nunits_in
;
5518 poly_uint64 nunits_out
;
5523 vec
<tree
> vec_oprnds0
= vNULL
;
5524 vec
<tree
> vec_oprnds1
= vNULL
;
5527 bool scalar_shift_arg
= true;
5528 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5529 vec_info
*vinfo
= stmt_info
->vinfo
;
5531 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5534 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5535 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5539 /* Is STMT a vectorizable binary/unary operation? */
5540 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5544 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5547 code
= gimple_assign_rhs_code (stmt
);
5549 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5550 || code
== RROTATE_EXPR
))
5553 scalar_dest
= gimple_assign_lhs (stmt
);
5554 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5555 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5557 if (dump_enabled_p ())
5558 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5559 "bit-precision shifts not supported.\n");
5563 op0
= gimple_assign_rhs1 (stmt
);
5564 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5566 if (dump_enabled_p ())
5567 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5568 "use not simple.\n");
5571 /* If op0 is an external or constant def use a vector type with
5572 the same size as the output vector type. */
5574 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5576 gcc_assert (vectype
);
5579 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5581 "no vectype for scalar type\n");
5585 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5586 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5587 if (maybe_ne (nunits_out
, nunits_in
))
5590 op1
= gimple_assign_rhs2 (stmt
);
5591 stmt_vec_info op1_def_stmt_info
;
5592 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
,
5593 &op1_def_stmt_info
))
5595 if (dump_enabled_p ())
5596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5597 "use not simple.\n");
5601 /* Multiple types in SLP are handled by creating the appropriate number of
5602 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5607 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5609 gcc_assert (ncopies
>= 1);
5611 /* Determine whether the shift amount is a vector, or scalar. If the
5612 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5614 if ((dt
[1] == vect_internal_def
5615 || dt
[1] == vect_induction_def
5616 || dt
[1] == vect_nested_cycle
)
5618 scalar_shift_arg
= false;
5619 else if (dt
[1] == vect_constant_def
5620 || dt
[1] == vect_external_def
5621 || dt
[1] == vect_internal_def
)
5623 /* In SLP, need to check whether the shift count is the same,
5624 in loops if it is a constant or invariant, it is always
5628 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5629 stmt_vec_info slpstmt_info
;
5631 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5633 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5634 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5635 scalar_shift_arg
= false;
5638 /* For internal SLP defs we have to make sure we see scalar stmts
5639 for all vector elements.
5640 ??? For different vectors we could resort to a different
5641 scalar shift operand but code-generation below simply always
5643 if (dt
[1] == vect_internal_def
5644 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5646 scalar_shift_arg
= false;
5649 /* If the shift amount is computed by a pattern stmt we cannot
5650 use the scalar amount directly thus give up and use a vector
5652 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5653 scalar_shift_arg
= false;
5657 if (dump_enabled_p ())
5658 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5659 "operand mode requires invariant argument.\n");
5663 /* Vector shifted by vector. */
5664 bool was_scalar_shift_arg
= scalar_shift_arg
;
5665 if (!scalar_shift_arg
)
5667 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5668 if (dump_enabled_p ())
5669 dump_printf_loc (MSG_NOTE
, vect_location
,
5670 "vector/vector shift/rotate found.\n");
5673 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5674 if ((op1_vectype
== NULL_TREE
5675 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5677 || SLP_TREE_DEF_TYPE
5678 (SLP_TREE_CHILDREN (slp_node
)[1]) != vect_constant_def
))
5680 if (dump_enabled_p ())
5681 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5682 "unusable type for last operand in"
5683 " vector/vector shift/rotate.\n");
5687 /* See if the machine has a vector shifted by scalar insn and if not
5688 then see if it has a vector shifted by vector insn. */
5691 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5693 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5695 if (dump_enabled_p ())
5696 dump_printf_loc (MSG_NOTE
, vect_location
,
5697 "vector/scalar shift/rotate found.\n");
5701 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5703 && (optab_handler (optab
, TYPE_MODE (vectype
))
5704 != CODE_FOR_nothing
))
5706 scalar_shift_arg
= false;
5708 if (dump_enabled_p ())
5709 dump_printf_loc (MSG_NOTE
, vect_location
,
5710 "vector/vector shift/rotate found.\n");
5712 /* Unlike the other binary operators, shifts/rotates have
5713 the rhs being int, instead of the same type as the lhs,
5714 so make sure the scalar is the right type if we are
5715 dealing with vectors of long long/long/short/char. */
5716 if (dt
[1] == vect_constant_def
)
5719 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5721 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5724 if (vec_stmt
&& !slp_node
)
5726 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5727 op1
= vect_init_vector (stmt_info
, op1
,
5728 TREE_TYPE (vectype
), NULL
);
5735 /* Supportable by target? */
5738 if (dump_enabled_p ())
5739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5743 vec_mode
= TYPE_MODE (vectype
);
5744 icode
= (int) optab_handler (optab
, vec_mode
);
5745 if (icode
== CODE_FOR_nothing
)
5747 if (dump_enabled_p ())
5748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5749 "op not supported by target.\n");
5750 /* Check only during analysis. */
5751 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5753 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5755 if (dump_enabled_p ())
5756 dump_printf_loc (MSG_NOTE
, vect_location
,
5757 "proceeding using word mode.\n");
5760 /* Worthwhile without SIMD support? Check only during analysis. */
5762 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5763 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5767 "not worthwhile without SIMD support.\n");
5771 if (!vec_stmt
) /* transformation not required. */
5773 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5774 DUMP_VECT_SCOPE ("vectorizable_shift");
5775 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5781 if (dump_enabled_p ())
5782 dump_printf_loc (MSG_NOTE
, vect_location
,
5783 "transform binary/unary operation.\n");
5786 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5788 prev_stmt_info
= NULL
;
5789 for (j
= 0; j
< ncopies
; j
++)
5794 if (scalar_shift_arg
)
5796 /* Vector shl and shr insn patterns can be defined with scalar
5797 operand 2 (shift operand). In this case, use constant or loop
5798 invariant op1 directly, without extending it to vector mode
5800 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5801 if (!VECTOR_MODE_P (optab_op2_mode
))
5803 if (dump_enabled_p ())
5804 dump_printf_loc (MSG_NOTE
, vect_location
,
5805 "operand 1 using scalar mode.\n");
5807 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5808 vec_oprnds1
.quick_push (vec_oprnd1
);
5811 /* Store vec_oprnd1 for every vector stmt to be created
5812 for SLP_NODE. We check during the analysis that all
5813 the shift arguments are the same.
5814 TODO: Allow different constants for different vector
5815 stmts generated for an SLP instance. */
5816 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5817 vec_oprnds1
.quick_push (vec_oprnd1
);
5822 && !useless_type_conversion_p (TREE_TYPE (vectype
),
5825 if (was_scalar_shift_arg
)
5827 /* If the argument was the same in all lanes create
5828 the correctly typed vector shift amount directly. */
5829 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5830 op1
= vect_init_vector (stmt_info
, op1
, TREE_TYPE (vectype
),
5831 !loop_vinfo
? gsi
: NULL
);
5832 vec_oprnd1
= vect_init_vector (stmt_info
, op1
, vectype
,
5833 !loop_vinfo
? gsi
: NULL
);
5834 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5835 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5836 vec_oprnds1
.quick_push (vec_oprnd1
);
5838 else if (dt
[1] == vect_constant_def
)
5840 /* Convert the scalar constant shift amounts in-place. */
5841 slp_tree shift
= SLP_TREE_CHILDREN (slp_node
)[1];
5842 gcc_assert (SLP_TREE_DEF_TYPE (shift
) == vect_constant_def
);
5843 for (unsigned i
= 0;
5844 i
< SLP_TREE_SCALAR_OPS (shift
).length (); ++i
)
5846 SLP_TREE_SCALAR_OPS (shift
)[i
]
5847 = fold_convert (TREE_TYPE (vectype
),
5848 SLP_TREE_SCALAR_OPS (shift
)[i
]);
5849 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift
)[i
])
5854 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5857 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5858 (a special case for certain kind of vector shifts); otherwise,
5859 operand 1 should be of a vector type (the usual case). */
5861 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5864 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
5868 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5870 /* Arguments are ready. Create the new vector stmt. */
5871 stmt_vec_info new_stmt_info
= NULL
;
5872 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5874 vop1
= vec_oprnds1
[i
];
5875 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5876 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5877 gimple_assign_set_lhs (new_stmt
, new_temp
);
5879 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5881 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5888 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5890 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5891 prev_stmt_info
= new_stmt_info
;
5894 vec_oprnds0
.release ();
5895 vec_oprnds1
.release ();
5901 /* Function vectorizable_operation.
5903 Check if STMT_INFO performs a binary, unary or ternary operation that can
5905 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5906 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5907 Return true if STMT_INFO is vectorizable in this way. */
5910 vectorizable_operation (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5911 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5912 stmt_vector_for_cost
*cost_vec
)
5916 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5918 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5919 enum tree_code code
, orig_code
;
5920 machine_mode vec_mode
;
5924 bool target_support_p
;
5925 enum vect_def_type dt
[3]
5926 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5928 stmt_vec_info prev_stmt_info
;
5929 poly_uint64 nunits_in
;
5930 poly_uint64 nunits_out
;
5932 int ncopies
, vec_num
;
5934 vec
<tree
> vec_oprnds0
= vNULL
;
5935 vec
<tree
> vec_oprnds1
= vNULL
;
5936 vec
<tree
> vec_oprnds2
= vNULL
;
5937 tree vop0
, vop1
, vop2
;
5938 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5939 vec_info
*vinfo
= stmt_info
->vinfo
;
5941 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5944 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5948 /* Is STMT a vectorizable binary/unary operation? */
5949 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5953 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5956 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5958 /* For pointer addition and subtraction, we should use the normal
5959 plus and minus for the vector operation. */
5960 if (code
== POINTER_PLUS_EXPR
)
5962 if (code
== POINTER_DIFF_EXPR
)
5965 /* Support only unary or binary operations. */
5966 op_type
= TREE_CODE_LENGTH (code
);
5967 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5969 if (dump_enabled_p ())
5970 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5971 "num. args = %d (not unary/binary/ternary op).\n",
5976 scalar_dest
= gimple_assign_lhs (stmt
);
5977 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5979 /* Most operations cannot handle bit-precision types without extra
5981 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5982 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5983 /* Exception are bitwise binary operations. */
5984 && code
!= BIT_IOR_EXPR
5985 && code
!= BIT_XOR_EXPR
5986 && code
!= BIT_AND_EXPR
)
5988 if (dump_enabled_p ())
5989 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5990 "bit-precision arithmetic not supported.\n");
5994 op0
= gimple_assign_rhs1 (stmt
);
5995 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5997 if (dump_enabled_p ())
5998 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5999 "use not simple.\n");
6002 /* If op0 is an external or constant def use a vector type with
6003 the same size as the output vector type. */
6006 /* For boolean type we cannot determine vectype by
6007 invariant value (don't know whether it is a vector
6008 of booleans or vector of integers). We use output
6009 vectype because operations on boolean don't change
6011 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6013 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6015 if (dump_enabled_p ())
6016 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6017 "not supported operation on bool value.\n");
6020 vectype
= vectype_out
;
6023 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
6026 gcc_assert (vectype
);
6029 if (dump_enabled_p ())
6030 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6031 "no vectype for scalar type %T\n",
6037 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6038 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6039 if (maybe_ne (nunits_out
, nunits_in
))
6042 if (op_type
== binary_op
|| op_type
== ternary_op
)
6044 op1
= gimple_assign_rhs2 (stmt
);
6045 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1]))
6047 if (dump_enabled_p ())
6048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6049 "use not simple.\n");
6053 if (op_type
== ternary_op
)
6055 op2
= gimple_assign_rhs3 (stmt
);
6056 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2]))
6058 if (dump_enabled_p ())
6059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6060 "use not simple.\n");
6065 /* Multiple types in SLP are handled by creating the appropriate number of
6066 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6071 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6075 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6079 gcc_assert (ncopies
>= 1);
6081 /* Shifts are handled in vectorizable_shift (). */
6082 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6083 || code
== RROTATE_EXPR
)
6086 /* Supportable by target? */
6088 vec_mode
= TYPE_MODE (vectype
);
6089 if (code
== MULT_HIGHPART_EXPR
)
6090 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6093 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6096 if (dump_enabled_p ())
6097 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6101 target_support_p
= (optab_handler (optab
, vec_mode
)
6102 != CODE_FOR_nothing
);
6105 if (!target_support_p
)
6107 if (dump_enabled_p ())
6108 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6109 "op not supported by target.\n");
6110 /* Check only during analysis. */
6111 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6112 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
6114 if (dump_enabled_p ())
6115 dump_printf_loc (MSG_NOTE
, vect_location
,
6116 "proceeding using word mode.\n");
6119 /* Worthwhile without SIMD support? Check only during analysis. */
6120 if (!VECTOR_MODE_P (vec_mode
)
6122 && !vect_worthwhile_without_simd_p (vinfo
, code
))
6124 if (dump_enabled_p ())
6125 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6126 "not worthwhile without SIMD support.\n");
6130 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6131 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6132 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6134 if (!vec_stmt
) /* transformation not required. */
6136 /* If this operation is part of a reduction, a fully-masked loop
6137 should only change the active lanes of the reduction chain,
6138 keeping the inactive lanes as-is. */
6140 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
6143 if (cond_fn
== IFN_LAST
6144 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6145 OPTIMIZE_FOR_SPEED
))
6147 if (dump_enabled_p ())
6148 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6149 "can't use a fully-masked loop because no"
6150 " conditional operation is available.\n");
6151 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
6154 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6158 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6159 DUMP_VECT_SCOPE ("vectorizable_operation");
6160 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6166 if (dump_enabled_p ())
6167 dump_printf_loc (MSG_NOTE
, vect_location
,
6168 "transform binary/unary operation.\n");
6170 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6172 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6173 vectors with unsigned elements, but the result is signed. So, we
6174 need to compute the MINUS_EXPR into vectype temporary and
6175 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6176 tree vec_cvt_dest
= NULL_TREE
;
6177 if (orig_code
== POINTER_DIFF_EXPR
)
6179 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6180 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6184 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6186 /* In case the vectorization factor (VF) is bigger than the number
6187 of elements that we can fit in a vectype (nunits), we have to generate
6188 more than one vector stmt - i.e - we need to "unroll" the
6189 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6190 from one copy of the vector stmt to the next, in the field
6191 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6192 stages to find the correct vector defs to be used when vectorizing
6193 stmts that use the defs of the current stmt. The example below
6194 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6195 we need to create 4 vectorized stmts):
6197 before vectorization:
6198 RELATED_STMT VEC_STMT
6202 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6204 RELATED_STMT VEC_STMT
6205 VS1_0: vx0 = memref0 VS1_1 -
6206 VS1_1: vx1 = memref1 VS1_2 -
6207 VS1_2: vx2 = memref2 VS1_3 -
6208 VS1_3: vx3 = memref3 - -
6209 S1: x = load - VS1_0
6212 step2: vectorize stmt S2 (done here):
6213 To vectorize stmt S2 we first need to find the relevant vector
6214 def for the first operand 'x'. This is, as usual, obtained from
6215 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6216 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6217 relevant vector def 'vx0'. Having found 'vx0' we can generate
6218 the vector stmt VS2_0, and as usual, record it in the
6219 STMT_VINFO_VEC_STMT of stmt S2.
6220 When creating the second copy (VS2_1), we obtain the relevant vector
6221 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6222 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6223 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6224 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6225 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6226 chain of stmts and pointers:
6227 RELATED_STMT VEC_STMT
6228 VS1_0: vx0 = memref0 VS1_1 -
6229 VS1_1: vx1 = memref1 VS1_2 -
6230 VS1_2: vx2 = memref2 VS1_3 -
6231 VS1_3: vx3 = memref3 - -
6232 S1: x = load - VS1_0
6233 VS2_0: vz0 = vx0 + v1 VS2_1 -
6234 VS2_1: vz1 = vx1 + v1 VS2_2 -
6235 VS2_2: vz2 = vx2 + v1 VS2_3 -
6236 VS2_3: vz3 = vx3 + v1 - -
6237 S2: z = x + 1 - VS2_0 */
6239 prev_stmt_info
= NULL
;
6240 for (j
= 0; j
< ncopies
; j
++)
6245 if (op_type
== binary_op
)
6246 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
6248 else if (op_type
== ternary_op
)
6252 auto_vec
<vec
<tree
> > vec_defs(3);
6253 vect_get_slp_defs (slp_node
, &vec_defs
);
6254 vec_oprnds0
= vec_defs
[0];
6255 vec_oprnds1
= vec_defs
[1];
6256 vec_oprnds2
= vec_defs
[2];
6260 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
6261 &vec_oprnds1
, NULL
);
6262 vect_get_vec_defs (op2
, NULL_TREE
, stmt_info
, &vec_oprnds2
,
6267 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
6272 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6273 if (op_type
== ternary_op
)
6275 tree vec_oprnd
= vec_oprnds2
.pop ();
6276 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6281 /* Arguments are ready. Create the new vector stmt. */
6282 stmt_vec_info new_stmt_info
= NULL
;
6283 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6285 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6286 ? vec_oprnds1
[i
] : NULL_TREE
);
6287 vop2
= ((op_type
== ternary_op
)
6288 ? vec_oprnds2
[i
] : NULL_TREE
);
6289 if (masked_loop_p
&& reduc_idx
>= 0)
6291 /* Perform the operation on active elements only and take
6292 inactive elements from the reduction chain input. */
6294 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6295 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6296 vectype
, i
* ncopies
+ j
);
6297 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6299 new_temp
= make_ssa_name (vec_dest
, call
);
6300 gimple_call_set_lhs (call
, new_temp
);
6301 gimple_call_set_nothrow (call
, true);
6303 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
6307 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6309 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6310 gimple_assign_set_lhs (new_stmt
, new_temp
);
6312 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6315 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6317 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6319 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6320 gimple_assign_set_lhs (new_stmt
, new_temp
);
6322 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6326 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6333 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6335 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6336 prev_stmt_info
= new_stmt_info
;
6339 vec_oprnds0
.release ();
6340 vec_oprnds1
.release ();
6341 vec_oprnds2
.release ();
6346 /* A helper function to ensure data reference DR_INFO's base alignment. */
6349 ensure_base_align (dr_vec_info
*dr_info
)
6351 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6354 if (dr_info
->base_misaligned
)
6356 tree base_decl
= dr_info
->base_decl
;
6358 // We should only be able to increase the alignment of a base object if
6359 // we know what its new alignment should be at compile time.
6360 unsigned HOST_WIDE_INT align_base_to
=
6361 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6363 if (decl_in_symtab_p (base_decl
))
6364 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6365 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6367 SET_DECL_ALIGN (base_decl
, align_base_to
);
6368 DECL_USER_ALIGN (base_decl
) = 1;
6370 dr_info
->base_misaligned
= false;
6375 /* Function get_group_alias_ptr_type.
6377 Return the alias type for the group starting at FIRST_STMT_INFO. */
6380 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6382 struct data_reference
*first_dr
, *next_dr
;
6384 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6385 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6386 while (next_stmt_info
)
6388 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6389 if (get_alias_set (DR_REF (first_dr
))
6390 != get_alias_set (DR_REF (next_dr
)))
6392 if (dump_enabled_p ())
6393 dump_printf_loc (MSG_NOTE
, vect_location
,
6394 "conflicting alias set types.\n");
6395 return ptr_type_node
;
6397 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6399 return reference_alias_ptr_type (DR_REF (first_dr
));
6403 /* Function scan_operand_equal_p.
6405 Helper function for check_scan_store. Compare two references
6406 with .GOMP_SIMD_LANE bases. */
6409 scan_operand_equal_p (tree ref1
, tree ref2
)
6411 tree ref
[2] = { ref1
, ref2
};
6412 poly_int64 bitsize
[2], bitpos
[2];
6413 tree offset
[2], base
[2];
6414 for (int i
= 0; i
< 2; ++i
)
6417 int unsignedp
, reversep
, volatilep
= 0;
6418 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6419 &offset
[i
], &mode
, &unsignedp
,
6420 &reversep
, &volatilep
);
6421 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6423 if (TREE_CODE (base
[i
]) == MEM_REF
6424 && offset
[i
] == NULL_TREE
6425 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6427 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6428 if (is_gimple_assign (def_stmt
)
6429 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6430 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6431 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6433 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6435 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6436 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6441 if (!operand_equal_p (base
[0], base
[1], 0))
6443 if (maybe_ne (bitsize
[0], bitsize
[1]))
6445 if (offset
[0] != offset
[1])
6447 if (!offset
[0] || !offset
[1])
6449 if (!operand_equal_p (offset
[0], offset
[1], 0))
6452 for (int i
= 0; i
< 2; ++i
)
6454 step
[i
] = integer_one_node
;
6455 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6457 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6458 if (is_gimple_assign (def_stmt
)
6459 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6460 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6463 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6464 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6467 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6469 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6470 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6472 tree rhs1
= NULL_TREE
;
6473 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6475 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6476 if (gimple_assign_cast_p (def_stmt
))
6477 rhs1
= gimple_assign_rhs1 (def_stmt
);
6479 else if (CONVERT_EXPR_P (offset
[i
]))
6480 rhs1
= TREE_OPERAND (offset
[i
], 0);
6482 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6483 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6484 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6485 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6488 if (!operand_equal_p (offset
[0], offset
[1], 0)
6489 || !operand_equal_p (step
[0], step
[1], 0))
6497 enum scan_store_kind
{
6498 /* Normal permutation. */
6499 scan_store_kind_perm
,
6501 /* Whole vector left shift permutation with zero init. */
6502 scan_store_kind_lshift_zero
,
6504 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6505 scan_store_kind_lshift_cond
6508 /* Function check_scan_store.
6510 Verify if we can perform the needed permutations or whole vector shifts.
6511 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6512 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6513 to do at each step. */
6516 scan_store_can_perm_p (tree vectype
, tree init
,
6517 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6519 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6520 unsigned HOST_WIDE_INT nunits
;
6521 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6523 int units_log2
= exact_log2 (nunits
);
6524 if (units_log2
<= 0)
6528 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6529 for (i
= 0; i
<= units_log2
; ++i
)
6531 unsigned HOST_WIDE_INT j
, k
;
6532 enum scan_store_kind kind
= scan_store_kind_perm
;
6533 vec_perm_builder
sel (nunits
, nunits
, 1);
6534 sel
.quick_grow (nunits
);
6535 if (i
== units_log2
)
6537 for (j
= 0; j
< nunits
; ++j
)
6538 sel
[j
] = nunits
- 1;
6542 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6544 for (k
= 0; j
< nunits
; ++j
, ++k
)
6545 sel
[j
] = nunits
+ k
;
6547 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6548 if (!can_vec_perm_const_p (vec_mode
, indices
))
6550 if (i
== units_log2
)
6553 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6555 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6557 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6558 /* Whole vector shifts shift in zeros, so if init is all zero
6559 constant, there is no need to do anything further. */
6560 if ((TREE_CODE (init
) != INTEGER_CST
6561 && TREE_CODE (init
) != REAL_CST
)
6562 || !initializer_zerop (init
))
6564 tree masktype
= build_same_sized_truth_vector_type (vectype
);
6565 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6567 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6570 kind
= whole_vector_shift_kind
;
6572 if (use_whole_vector
)
6574 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6575 use_whole_vector
->safe_grow_cleared (i
);
6576 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6577 use_whole_vector
->safe_push (kind
);
6585 /* Function check_scan_store.
6587 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6590 check_scan_store (stmt_vec_info stmt_info
, tree vectype
,
6591 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6592 vect_memory_access_type memory_access_type
)
6594 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6595 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6598 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6601 || memory_access_type
!= VMAT_CONTIGUOUS
6602 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6603 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6604 || loop_vinfo
== NULL
6605 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6606 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6607 || !integer_zerop (DR_OFFSET (dr_info
->dr
))
6608 || !integer_zerop (DR_INIT (dr_info
->dr
))
6609 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6610 || !alias_sets_conflict_p (get_alias_set (vectype
),
6611 get_alias_set (TREE_TYPE (ref_type
))))
6613 if (dump_enabled_p ())
6614 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6615 "unsupported OpenMP scan store.\n");
6619 /* We need to pattern match code built by OpenMP lowering and simplified
6620 by following optimizations into something we can handle.
6621 #pragma omp simd reduction(inscan,+:r)
6625 #pragma omp scan inclusive (r)
6628 shall have body with:
6629 // Initialization for input phase, store the reduction initializer:
6630 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6631 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6633 // Actual input phase:
6635 r.0_5 = D.2042[_20];
6638 // Initialization for scan phase:
6639 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6645 // Actual scan phase:
6647 r.1_8 = D.2042[_20];
6649 The "omp simd array" variable D.2042 holds the privatized copy used
6650 inside of the loop and D.2043 is another one that holds copies of
6651 the current original list item. The separate GOMP_SIMD_LANE ifn
6652 kinds are there in order to allow optimizing the initializer store
6653 and combiner sequence, e.g. if it is originally some C++ish user
6654 defined reduction, but allow the vectorizer to pattern recognize it
6655 and turn into the appropriate vectorized scan.
6657 For exclusive scan, this is slightly different:
6658 #pragma omp simd reduction(inscan,+:r)
6662 #pragma omp scan exclusive (r)
6665 shall have body with:
6666 // Initialization for input phase, store the reduction initializer:
6667 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6668 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6670 // Actual input phase:
6672 r.0_5 = D.2042[_20];
6675 // Initialization for scan phase:
6676 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6682 // Actual scan phase:
6684 r.1_8 = D.2044[_20];
6687 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6689 /* Match the D.2042[_21] = 0; store above. Just require that
6690 it is a constant or external definition store. */
6691 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6694 if (dump_enabled_p ())
6695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6696 "unsupported OpenMP scan initializer store.\n");
6700 if (! loop_vinfo
->scan_map
)
6701 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6702 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6703 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6706 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6708 /* These stores can be vectorized normally. */
6712 if (rhs_dt
!= vect_internal_def
)
6715 if (dump_enabled_p ())
6716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6717 "unsupported OpenMP scan combiner pattern.\n");
6721 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6722 tree rhs
= gimple_assign_rhs1 (stmt
);
6723 if (TREE_CODE (rhs
) != SSA_NAME
)
6726 gimple
*other_store_stmt
= NULL
;
6727 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6728 bool inscan_var_store
6729 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6731 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6733 if (!inscan_var_store
)
6735 use_operand_p use_p
;
6736 imm_use_iterator iter
;
6737 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6739 gimple
*use_stmt
= USE_STMT (use_p
);
6740 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6742 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6743 || !is_gimple_assign (use_stmt
)
6744 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6746 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6748 other_store_stmt
= use_stmt
;
6750 if (other_store_stmt
== NULL
)
6752 rhs
= gimple_assign_lhs (other_store_stmt
);
6753 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6757 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6759 use_operand_p use_p
;
6760 imm_use_iterator iter
;
6761 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6763 gimple
*use_stmt
= USE_STMT (use_p
);
6764 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6766 if (other_store_stmt
)
6768 other_store_stmt
= use_stmt
;
6774 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6775 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6776 || !is_gimple_assign (def_stmt
)
6777 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6780 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6781 /* For pointer addition, we should use the normal plus for the vector
6785 case POINTER_PLUS_EXPR
:
6788 case MULT_HIGHPART_EXPR
:
6793 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6796 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6797 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6798 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6801 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6802 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6803 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6804 || !gimple_assign_load_p (load1_stmt
)
6805 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6806 || !gimple_assign_load_p (load2_stmt
))
6809 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6810 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6811 if (load1_stmt_info
== NULL
6812 || load2_stmt_info
== NULL
6813 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6814 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6815 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6816 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6819 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6821 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6822 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6823 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6825 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6827 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6831 use_operand_p use_p
;
6832 imm_use_iterator iter
;
6833 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6835 gimple
*use_stmt
= USE_STMT (use_p
);
6836 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6838 if (other_store_stmt
)
6840 other_store_stmt
= use_stmt
;
6844 if (other_store_stmt
== NULL
)
6846 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6847 || !gimple_store_p (other_store_stmt
))
6850 stmt_vec_info other_store_stmt_info
6851 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6852 if (other_store_stmt_info
== NULL
6853 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6854 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6857 gimple
*stmt1
= stmt
;
6858 gimple
*stmt2
= other_store_stmt
;
6859 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6860 std::swap (stmt1
, stmt2
);
6861 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6862 gimple_assign_rhs1 (load2_stmt
)))
6864 std::swap (rhs1
, rhs2
);
6865 std::swap (load1_stmt
, load2_stmt
);
6866 std::swap (load1_stmt_info
, load2_stmt_info
);
6868 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6869 gimple_assign_rhs1 (load1_stmt
)))
6872 tree var3
= NULL_TREE
;
6873 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6874 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6875 gimple_assign_rhs1 (load2_stmt
)))
6877 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6879 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6880 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6881 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6883 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6884 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6885 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6886 || lookup_attribute ("omp simd inscan exclusive",
6887 DECL_ATTRIBUTES (var3
)))
6891 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6892 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6893 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6896 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6897 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6898 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6899 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6900 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6901 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6904 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6905 std::swap (var1
, var2
);
6907 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6909 if (!lookup_attribute ("omp simd inscan exclusive",
6910 DECL_ATTRIBUTES (var1
)))
6915 if (loop_vinfo
->scan_map
== NULL
)
6917 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6921 /* The IL is as expected, now check if we can actually vectorize it.
6928 should be vectorized as (where _40 is the vectorized rhs
6929 from the D.2042[_21] = 0; store):
6930 _30 = MEM <vector(8) int> [(int *)&D.2043];
6931 _31 = MEM <vector(8) int> [(int *)&D.2042];
6932 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6934 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6935 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6937 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6938 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6939 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6941 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6942 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6944 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6945 MEM <vector(8) int> [(int *)&D.2043] = _39;
6946 MEM <vector(8) int> [(int *)&D.2042] = _38;
6953 should be vectorized as (where _40 is the vectorized rhs
6954 from the D.2042[_21] = 0; store):
6955 _30 = MEM <vector(8) int> [(int *)&D.2043];
6956 _31 = MEM <vector(8) int> [(int *)&D.2042];
6957 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6958 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6960 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6961 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6962 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6964 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6965 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6966 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6968 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6969 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6972 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6973 MEM <vector(8) int> [(int *)&D.2044] = _39;
6974 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6975 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6976 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6977 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
6980 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
6981 if (units_log2
== -1)
6988 /* Function vectorizable_scan_store.
6990 Helper of vectorizable_score, arguments like on vectorizable_store.
6991 Handle only the transformation, checking is done in check_scan_store. */
6994 vectorizable_scan_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6995 stmt_vec_info
*vec_stmt
, int ncopies
)
6997 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6998 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6999 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7000 vec_info
*vinfo
= stmt_info
->vinfo
;
7001 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7003 if (dump_enabled_p ())
7004 dump_printf_loc (MSG_NOTE
, vect_location
,
7005 "transform scan store. ncopies = %d\n", ncopies
);
7007 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7008 tree rhs
= gimple_assign_rhs1 (stmt
);
7009 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7011 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7012 bool inscan_var_store
7013 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7015 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7017 use_operand_p use_p
;
7018 imm_use_iterator iter
;
7019 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7021 gimple
*use_stmt
= USE_STMT (use_p
);
7022 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7024 rhs
= gimple_assign_lhs (use_stmt
);
7029 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7030 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7031 if (code
== POINTER_PLUS_EXPR
)
7033 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7034 && commutative_tree_code (code
));
7035 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7036 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7037 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7038 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7039 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7040 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7041 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7042 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7043 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7044 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7045 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7047 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7049 std::swap (rhs1
, rhs2
);
7050 std::swap (var1
, var2
);
7051 std::swap (load1_dr_info
, load2_dr_info
);
7054 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7057 unsigned HOST_WIDE_INT nunits
;
7058 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7060 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7061 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7062 gcc_assert (units_log2
> 0);
7063 auto_vec
<tree
, 16> perms
;
7064 perms
.quick_grow (units_log2
+ 1);
7065 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7066 for (int i
= 0; i
<= units_log2
; ++i
)
7068 unsigned HOST_WIDE_INT j
, k
;
7069 vec_perm_builder
sel (nunits
, nunits
, 1);
7070 sel
.quick_grow (nunits
);
7071 if (i
== units_log2
)
7072 for (j
= 0; j
< nunits
; ++j
)
7073 sel
[j
] = nunits
- 1;
7076 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7078 for (k
= 0; j
< nunits
; ++j
, ++k
)
7079 sel
[j
] = nunits
+ k
;
7081 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7082 if (!use_whole_vector
.is_empty ()
7083 && use_whole_vector
[i
] != scan_store_kind_perm
)
7085 if (zero_vec
== NULL_TREE
)
7086 zero_vec
= build_zero_cst (vectype
);
7087 if (masktype
== NULL_TREE
7088 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7089 masktype
= build_same_sized_truth_vector_type (vectype
);
7090 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7093 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7096 stmt_vec_info prev_stmt_info
= NULL
;
7097 tree vec_oprnd1
= NULL_TREE
;
7098 tree vec_oprnd2
= NULL_TREE
;
7099 tree vec_oprnd3
= NULL_TREE
;
7100 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7101 tree dataref_offset
= build_int_cst (ref_type
, 0);
7102 tree bump
= vect_get_data_ptr_increment (dr_info
, vectype
, VMAT_CONTIGUOUS
);
7103 tree ldataref_ptr
= NULL_TREE
;
7104 tree orig
= NULL_TREE
;
7105 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7106 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7107 for (int j
= 0; j
< ncopies
; j
++)
7109 stmt_vec_info new_stmt_info
;
7112 vec_oprnd1
= vect_get_vec_def_for_operand (*init
, stmt_info
);
7113 if (ldataref_ptr
== NULL
)
7114 vec_oprnd2
= vect_get_vec_def_for_operand (rhs1
, stmt_info
);
7115 vec_oprnd3
= vect_get_vec_def_for_operand (rhs2
, stmt_info
);
7120 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
7121 if (ldataref_ptr
== NULL
)
7122 vec_oprnd2
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd2
);
7123 vec_oprnd3
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd3
);
7124 if (!inscan_var_store
)
7125 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7130 vec_oprnd2
= make_ssa_name (vectype
);
7131 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7132 unshare_expr (ldataref_ptr
),
7134 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7135 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7136 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7137 if (prev_stmt_info
== NULL
)
7138 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7140 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7141 prev_stmt_info
= new_stmt_info
;
7144 tree v
= vec_oprnd2
;
7145 for (int i
= 0; i
< units_log2
; ++i
)
7147 tree new_temp
= make_ssa_name (vectype
);
7148 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7150 && (use_whole_vector
[i
]
7151 != scan_store_kind_perm
))
7152 ? zero_vec
: vec_oprnd1
, v
,
7154 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7155 if (prev_stmt_info
== NULL
)
7156 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7158 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7159 prev_stmt_info
= new_stmt_info
;
7161 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7163 /* Whole vector shift shifted in zero bits, but if *init
7164 is not initializer_zerop, we need to replace those elements
7165 with elements from vec_oprnd1. */
7166 tree_vector_builder
vb (masktype
, nunits
, 1);
7167 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7168 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7169 ? boolean_false_node
: boolean_true_node
);
7171 tree new_temp2
= make_ssa_name (vectype
);
7172 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7173 new_temp
, vec_oprnd1
);
7174 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7175 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7176 prev_stmt_info
= new_stmt_info
;
7177 new_temp
= new_temp2
;
7180 /* For exclusive scan, perform the perms[i] permutation once
7183 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7191 tree new_temp2
= make_ssa_name (vectype
);
7192 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7193 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7194 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7195 prev_stmt_info
= new_stmt_info
;
7200 tree new_temp
= make_ssa_name (vectype
);
7201 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7202 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7203 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7204 prev_stmt_info
= new_stmt_info
;
7206 tree last_perm_arg
= new_temp
;
7207 /* For exclusive scan, new_temp computed above is the exclusive scan
7208 prefix sum. Turn it into inclusive prefix sum for the broadcast
7209 of the last element into orig. */
7210 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7212 last_perm_arg
= make_ssa_name (vectype
);
7213 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7214 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7215 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7216 prev_stmt_info
= new_stmt_info
;
7219 orig
= make_ssa_name (vectype
);
7220 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7221 last_perm_arg
, perms
[units_log2
]);
7222 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7223 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7224 prev_stmt_info
= new_stmt_info
;
7226 if (!inscan_var_store
)
7228 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7229 unshare_expr (dataref_ptr
),
7231 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7232 g
= gimple_build_assign (data_ref
, new_temp
);
7233 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7234 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7235 prev_stmt_info
= new_stmt_info
;
7239 if (inscan_var_store
)
7240 for (int j
= 0; j
< ncopies
; j
++)
7243 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7245 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7246 unshare_expr (dataref_ptr
),
7248 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7249 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7250 stmt_vec_info new_stmt_info
7251 = vect_finish_stmt_generation (stmt_info
, g
, gsi
);
7252 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7253 prev_stmt_info
= new_stmt_info
;
7259 /* Function vectorizable_store.
7261 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7262 that can be vectorized.
7263 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7264 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7265 Return true if STMT_INFO is vectorizable in this way. */
7268 vectorizable_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7269 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7270 stmt_vector_for_cost
*cost_vec
)
7274 tree vec_oprnd
= NULL_TREE
;
7276 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7277 class loop
*loop
= NULL
;
7278 machine_mode vec_mode
;
7280 enum dr_alignment_support alignment_support_scheme
;
7281 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7282 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7283 stmt_vec_info prev_stmt_info
= NULL
;
7284 tree dataref_ptr
= NULL_TREE
;
7285 tree dataref_offset
= NULL_TREE
;
7286 gimple
*ptr_incr
= NULL
;
7289 stmt_vec_info first_stmt_info
;
7291 unsigned int group_size
, i
;
7292 vec
<tree
> oprnds
= vNULL
;
7293 vec
<tree
> result_chain
= vNULL
;
7294 tree offset
= NULL_TREE
;
7295 vec
<tree
> vec_oprnds
= vNULL
;
7296 bool slp
= (slp_node
!= NULL
);
7297 unsigned int vec_num
;
7298 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7299 vec_info
*vinfo
= stmt_info
->vinfo
;
7301 gather_scatter_info gs_info
;
7303 vec_load_store_type vls_type
;
7306 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7309 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7313 /* Is vectorizable store? */
7315 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7316 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7318 tree scalar_dest
= gimple_assign_lhs (assign
);
7319 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7320 && is_pattern_stmt_p (stmt_info
))
7321 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7322 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7323 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7324 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7325 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7326 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7327 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7328 && TREE_CODE (scalar_dest
) != MEM_REF
)
7333 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7334 if (!call
|| !gimple_call_internal_p (call
))
7337 internal_fn ifn
= gimple_call_internal_fn (call
);
7338 if (!internal_store_fn_p (ifn
))
7341 if (slp_node
!= NULL
)
7343 if (dump_enabled_p ())
7344 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7345 "SLP of masked stores not supported.\n");
7349 int mask_index
= internal_fn_mask_index (ifn
);
7350 if (mask_index
>= 0)
7352 mask
= gimple_call_arg (call
, mask_index
);
7353 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
7359 op
= vect_get_store_rhs (stmt_info
);
7361 /* Cannot have hybrid store SLP -- that would mean storing to the
7362 same location twice. */
7363 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7365 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7366 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7370 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7371 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7376 /* Multiple types in SLP are handled by creating the appropriate number of
7377 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7382 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7384 gcc_assert (ncopies
>= 1);
7386 /* FORNOW. This restriction should be relaxed. */
7387 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7389 if (dump_enabled_p ())
7390 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7391 "multiple types in nested loop.\n");
7395 if (!vect_check_store_rhs (stmt_info
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7398 elem_type
= TREE_TYPE (vectype
);
7399 vec_mode
= TYPE_MODE (vectype
);
7401 if (!STMT_VINFO_DATA_REF (stmt_info
))
7404 vect_memory_access_type memory_access_type
;
7405 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, vls_type
, ncopies
,
7406 &memory_access_type
, &gs_info
))
7411 if (memory_access_type
== VMAT_CONTIGUOUS
)
7413 if (!VECTOR_MODE_P (vec_mode
)
7414 || !can_vec_mask_load_store_p (vec_mode
,
7415 TYPE_MODE (mask_vectype
), false))
7418 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7419 && (memory_access_type
!= VMAT_GATHER_SCATTER
7420 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7422 if (dump_enabled_p ())
7423 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7424 "unsupported access type for masked store.\n");
7430 /* FORNOW. In some cases can vectorize even if data-type not supported
7431 (e.g. - array initialization with 0). */
7432 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7436 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7437 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7438 && memory_access_type
!= VMAT_GATHER_SCATTER
7439 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7442 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7443 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7444 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7448 first_stmt_info
= stmt_info
;
7449 first_dr_info
= dr_info
;
7450 group_size
= vec_num
= 1;
7453 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7455 if (!check_scan_store (stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7456 memory_access_type
))
7460 if (!vec_stmt
) /* transformation not required. */
7462 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7465 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7466 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
7467 memory_access_type
, &gs_info
, mask
);
7469 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7470 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
7471 vls_type
, slp_node
, cost_vec
);
7474 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7478 ensure_base_align (dr_info
);
7480 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7482 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7483 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7484 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7485 tree ptr
, var
, scale
, vec_mask
;
7486 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7487 tree mask_halfvectype
= mask_vectype
;
7488 edge pe
= loop_preheader_edge (loop
);
7491 enum { NARROW
, NONE
, WIDEN
} modifier
;
7492 poly_uint64 scatter_off_nunits
7493 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7495 if (known_eq (nunits
, scatter_off_nunits
))
7497 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7501 /* Currently gathers and scatters are only supported for
7502 fixed-length vectors. */
7503 unsigned int count
= scatter_off_nunits
.to_constant ();
7504 vec_perm_builder
sel (count
, count
, 1);
7505 for (i
= 0; i
< (unsigned int) count
; ++i
)
7506 sel
.quick_push (i
| (count
/ 2));
7508 vec_perm_indices
indices (sel
, 1, count
);
7509 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7511 gcc_assert (perm_mask
!= NULL_TREE
);
7513 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7517 /* Currently gathers and scatters are only supported for
7518 fixed-length vectors. */
7519 unsigned int count
= nunits
.to_constant ();
7520 vec_perm_builder
sel (count
, count
, 1);
7521 for (i
= 0; i
< (unsigned int) count
; ++i
)
7522 sel
.quick_push (i
| (count
/ 2));
7524 vec_perm_indices
indices (sel
, 2, count
);
7525 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7526 gcc_assert (perm_mask
!= NULL_TREE
);
7531 = build_same_sized_truth_vector_type (gs_info
.offset_vectype
);
7536 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7537 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7538 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7539 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7540 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7541 scaletype
= TREE_VALUE (arglist
);
7543 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7544 && TREE_CODE (rettype
) == VOID_TYPE
);
7546 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7547 if (!is_gimple_min_invariant (ptr
))
7549 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7550 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7551 gcc_assert (!new_bb
);
7554 if (mask
== NULL_TREE
)
7556 mask_arg
= build_int_cst (masktype
, -1);
7557 mask_arg
= vect_init_vector (stmt_info
, mask_arg
, masktype
, NULL
);
7560 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7562 prev_stmt_info
= NULL
;
7563 for (j
= 0; j
< ncopies
; ++j
)
7567 src
= vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt_info
);
7568 op
= vec_oprnd0
= vect_get_vec_def_for_operand (gs_info
.offset
,
7571 mask_op
= vec_mask
= vect_get_vec_def_for_operand (mask
,
7574 else if (modifier
!= NONE
&& (j
& 1))
7576 if (modifier
== WIDEN
)
7579 = vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7581 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
7585 = vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7588 else if (modifier
== NARROW
)
7590 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
7592 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7600 src
= vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7602 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7605 mask_op
= vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7609 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7611 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7612 TYPE_VECTOR_SUBPARTS (srctype
)));
7613 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7614 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7616 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7617 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7621 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7623 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7624 TYPE_VECTOR_SUBPARTS (idxtype
)));
7625 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7626 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7628 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7629 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7637 if (modifier
== NARROW
)
7639 var
= vect_get_new_ssa_name (mask_halfvectype
,
7642 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7643 : VEC_UNPACK_LO_EXPR
,
7645 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7648 tree optype
= TREE_TYPE (mask_arg
);
7649 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7652 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7653 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7654 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7656 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7657 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7659 if (!useless_type_conversion_p (masktype
, utype
))
7661 gcc_assert (TYPE_PRECISION (utype
)
7662 <= TYPE_PRECISION (masktype
));
7663 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7664 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7665 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7671 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7672 stmt_vec_info new_stmt_info
7673 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7675 if (prev_stmt_info
== NULL
)
7676 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7678 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7679 prev_stmt_info
= new_stmt_info
;
7683 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7684 return vectorizable_scan_store (stmt_info
, gsi
, vec_stmt
, ncopies
);
7686 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7687 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7692 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7694 /* We vectorize all the stmts of the interleaving group when we
7695 reach the last stmt in the group. */
7696 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7697 < DR_GROUP_SIZE (first_stmt_info
)
7706 grouped_store
= false;
7707 /* VEC_NUM is the number of vect stmts to be created for this
7709 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7710 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7711 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7712 == first_stmt_info
);
7713 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7714 op
= vect_get_store_rhs (first_stmt_info
);
7717 /* VEC_NUM is the number of vect stmts to be created for this
7719 vec_num
= group_size
;
7721 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7724 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7726 if (dump_enabled_p ())
7727 dump_printf_loc (MSG_NOTE
, vect_location
,
7728 "transform store. ncopies = %d\n", ncopies
);
7730 if (memory_access_type
== VMAT_ELEMENTWISE
7731 || memory_access_type
== VMAT_STRIDED_SLP
)
7733 gimple_stmt_iterator incr_gsi
;
7739 tree stride_base
, stride_step
, alias_off
;
7742 /* Checked by get_load_store_type. */
7743 unsigned int const_nunits
= nunits
.to_constant ();
7745 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7746 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7749 = fold_build_pointer_plus
7750 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7751 size_binop (PLUS_EXPR
,
7752 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
7753 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7754 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7756 /* For a store with loop-invariant (but other than power-of-2)
7757 stride (i.e. not a grouped access) like so:
7759 for (i = 0; i < n; i += stride)
7762 we generate a new induction variable and new stores from
7763 the components of the (vectorized) rhs:
7765 for (j = 0; ; j += VF*stride)
7770 array[j + stride] = tmp2;
7774 unsigned nstores
= const_nunits
;
7776 tree ltype
= elem_type
;
7777 tree lvectype
= vectype
;
7780 if (group_size
< const_nunits
7781 && const_nunits
% group_size
== 0)
7783 nstores
= const_nunits
/ group_size
;
7785 ltype
= build_vector_type (elem_type
, group_size
);
7788 /* First check if vec_extract optab doesn't support extraction
7789 of vector elts directly. */
7790 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7792 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
7793 || !VECTOR_MODE_P (vmode
)
7794 || !targetm
.vector_mode_supported_p (vmode
)
7795 || (convert_optab_handler (vec_extract_optab
,
7796 TYPE_MODE (vectype
), vmode
)
7797 == CODE_FOR_nothing
))
7799 /* Try to avoid emitting an extract of vector elements
7800 by performing the extracts using an integer type of the
7801 same size, extracting from a vector of those and then
7802 re-interpreting it as the original vector type if
7805 = group_size
* GET_MODE_BITSIZE (elmode
);
7806 unsigned int lnunits
= const_nunits
/ group_size
;
7807 /* If we can't construct such a vector fall back to
7808 element extracts from the original vector type and
7809 element size stores. */
7810 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7811 && mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7812 && VECTOR_MODE_P (vmode
)
7813 && targetm
.vector_mode_supported_p (vmode
)
7814 && (convert_optab_handler (vec_extract_optab
,
7816 != CODE_FOR_nothing
))
7820 ltype
= build_nonstandard_integer_type (lsize
, 1);
7821 lvectype
= build_vector_type (ltype
, nstores
);
7823 /* Else fall back to vector extraction anyway.
7824 Fewer stores are more important than avoiding spilling
7825 of the vector we extract from. Compared to the
7826 construction case in vectorizable_load no store-forwarding
7827 issue exists here for reasonable archs. */
7830 else if (group_size
>= const_nunits
7831 && group_size
% const_nunits
== 0)
7834 lnel
= const_nunits
;
7838 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7839 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7842 ivstep
= stride_step
;
7843 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7844 build_int_cst (TREE_TYPE (ivstep
), vf
));
7846 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7848 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7849 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7850 create_iv (stride_base
, ivstep
, NULL
,
7851 loop
, &incr_gsi
, insert_after
,
7853 incr
= gsi_stmt (incr_gsi
);
7854 loop_vinfo
->add_stmt (incr
);
7856 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7858 prev_stmt_info
= NULL
;
7859 alias_off
= build_int_cst (ref_type
, 0);
7860 stmt_vec_info next_stmt_info
= first_stmt_info
;
7861 for (g
= 0; g
< group_size
; g
++)
7863 running_off
= offvar
;
7866 tree size
= TYPE_SIZE_UNIT (ltype
);
7867 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7869 tree newoff
= copy_ssa_name (running_off
, NULL
);
7870 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7872 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7873 running_off
= newoff
;
7875 unsigned int group_el
= 0;
7876 unsigned HOST_WIDE_INT
7877 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7878 for (j
= 0; j
< ncopies
; j
++)
7880 /* We've set op and dt above, from vect_get_store_rhs,
7881 and first_stmt_info == stmt_info. */
7886 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
,
7887 &vec_oprnds
, NULL
, slp_node
);
7888 vec_oprnd
= vec_oprnds
[0];
7892 op
= vect_get_store_rhs (next_stmt_info
);
7893 vec_oprnd
= vect_get_vec_def_for_operand
7894 (op
, next_stmt_info
);
7900 vec_oprnd
= vec_oprnds
[j
];
7902 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
7905 /* Pun the vector to extract from if necessary. */
7906 if (lvectype
!= vectype
)
7908 tree tem
= make_ssa_name (lvectype
);
7910 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7911 lvectype
, vec_oprnd
));
7912 vect_finish_stmt_generation (stmt_info
, pun
, gsi
);
7915 for (i
= 0; i
< nstores
; i
++)
7917 tree newref
, newoff
;
7918 gimple
*incr
, *assign
;
7919 tree size
= TYPE_SIZE (ltype
);
7920 /* Extract the i'th component. */
7921 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7922 bitsize_int (i
), size
);
7923 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7926 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7930 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7932 newref
= build2 (MEM_REF
, ltype
,
7933 running_off
, this_off
);
7934 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7936 /* And store it to *running_off. */
7937 assign
= gimple_build_assign (newref
, elem
);
7938 stmt_vec_info assign_info
7939 = vect_finish_stmt_generation (stmt_info
, assign
, gsi
);
7943 || group_el
== group_size
)
7945 newoff
= copy_ssa_name (running_off
, NULL
);
7946 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7947 running_off
, stride_step
);
7948 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7950 running_off
= newoff
;
7953 if (g
== group_size
- 1
7956 if (j
== 0 && i
== 0)
7957 STMT_VINFO_VEC_STMT (stmt_info
)
7958 = *vec_stmt
= assign_info
;
7960 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
7961 prev_stmt_info
= assign_info
;
7965 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7970 vec_oprnds
.release ();
7974 auto_vec
<tree
> dr_chain (group_size
);
7975 oprnds
.create (group_size
);
7977 alignment_support_scheme
7978 = vect_supportable_dr_alignment (first_dr_info
, false);
7979 gcc_assert (alignment_support_scheme
);
7980 vec_loop_masks
*loop_masks
7981 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7982 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7984 /* Targets with store-lane instructions must not require explicit
7985 realignment. vect_supportable_dr_alignment always returns either
7986 dr_aligned or dr_unaligned_supported for masked operations. */
7987 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7990 || alignment_support_scheme
== dr_aligned
7991 || alignment_support_scheme
== dr_unaligned_supported
);
7993 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
7994 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7995 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7998 tree vec_offset
= NULL_TREE
;
7999 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8001 aggr_type
= NULL_TREE
;
8004 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8006 aggr_type
= elem_type
;
8007 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8008 &bump
, &vec_offset
);
8012 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8013 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8015 aggr_type
= vectype
;
8016 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
8017 memory_access_type
);
8021 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8023 /* In case the vectorization factor (VF) is bigger than the number
8024 of elements that we can fit in a vectype (nunits), we have to generate
8025 more than one vector stmt - i.e - we need to "unroll" the
8026 vector stmt by a factor VF/nunits. For more details see documentation in
8027 vect_get_vec_def_for_copy_stmt. */
8029 /* In case of interleaving (non-unit grouped access):
8036 We create vectorized stores starting from base address (the access of the
8037 first stmt in the chain (S2 in the above example), when the last store stmt
8038 of the chain (S4) is reached:
8041 VS2: &base + vec_size*1 = vx0
8042 VS3: &base + vec_size*2 = vx1
8043 VS4: &base + vec_size*3 = vx3
8045 Then permutation statements are generated:
8047 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8048 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8051 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8052 (the order of the data-refs in the output of vect_permute_store_chain
8053 corresponds to the order of scalar stmts in the interleaving chain - see
8054 the documentation of vect_permute_store_chain()).
8056 In case of both multiple types and interleaving, above vector stores and
8057 permutation stmts are created for every copy. The result vector stmts are
8058 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8059 STMT_VINFO_RELATED_STMT for the next copies.
8062 prev_stmt_info
= NULL
;
8063 tree vec_mask
= NULL_TREE
;
8064 for (j
= 0; j
< ncopies
; j
++)
8066 stmt_vec_info new_stmt_info
;
8071 /* Get vectorized arguments for SLP_NODE. */
8072 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
8075 vec_oprnd
= vec_oprnds
[0];
8079 /* For interleaved stores we collect vectorized defs for all the
8080 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8081 used as an input to vect_permute_store_chain(), and OPRNDS as
8082 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8084 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8085 OPRNDS are of size 1. */
8086 stmt_vec_info next_stmt_info
= first_stmt_info
;
8087 for (i
= 0; i
< group_size
; i
++)
8089 /* Since gaps are not supported for interleaved stores,
8090 DR_GROUP_SIZE is the exact number of stmts in the chain.
8091 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8092 that there is no interleaving, DR_GROUP_SIZE is 1,
8093 and only one iteration of the loop will be executed. */
8094 op
= vect_get_store_rhs (next_stmt_info
);
8095 vec_oprnd
= vect_get_vec_def_for_operand
8096 (op
, next_stmt_info
);
8097 dr_chain
.quick_push (vec_oprnd
);
8098 oprnds
.quick_push (vec_oprnd
);
8099 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8102 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
8106 /* We should have catched mismatched types earlier. */
8107 gcc_assert (useless_type_conversion_p (vectype
,
8108 TREE_TYPE (vec_oprnd
)));
8109 bool simd_lane_access_p
8110 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8111 if (simd_lane_access_p
8113 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8114 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8115 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
8116 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8117 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8118 get_alias_set (TREE_TYPE (ref_type
))))
8120 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8121 dataref_offset
= build_int_cst (ref_type
, 0);
8123 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8124 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
8125 &dataref_ptr
, &vec_offset
);
8128 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
,
8129 simd_lane_access_p
? loop
: NULL
,
8130 offset
, &dummy
, gsi
, &ptr_incr
,
8131 simd_lane_access_p
, NULL_TREE
, bump
);
8135 /* For interleaved stores we created vectorized defs for all the
8136 defs stored in OPRNDS in the previous iteration (previous copy).
8137 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8138 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8140 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8141 OPRNDS are of size 1. */
8142 for (i
= 0; i
< group_size
; i
++)
8145 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
8146 dr_chain
[i
] = vec_oprnd
;
8147 oprnds
[i
] = vec_oprnd
;
8150 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8153 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8154 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8155 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8157 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8161 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8165 /* Get an array into which we can store the individual vectors. */
8166 vec_array
= create_vector_array (vectype
, vec_num
);
8168 /* Invalidate the current contents of VEC_ARRAY. This should
8169 become an RTL clobber too, which prevents the vector registers
8170 from being upward-exposed. */
8171 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8173 /* Store the individual vectors into the array. */
8174 for (i
= 0; i
< vec_num
; i
++)
8176 vec_oprnd
= dr_chain
[i
];
8177 write_vector_array (stmt_info
, gsi
, vec_oprnd
, vec_array
, i
);
8180 tree final_mask
= NULL
;
8182 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8185 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8192 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8194 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8195 tree alias_ptr
= build_int_cst (ref_type
, align
);
8196 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8197 dataref_ptr
, alias_ptr
,
8198 final_mask
, vec_array
);
8203 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8204 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8205 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8207 gimple_call_set_lhs (call
, data_ref
);
8209 gimple_call_set_nothrow (call
, true);
8210 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8212 /* Record that VEC_ARRAY is now dead. */
8213 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8217 new_stmt_info
= NULL
;
8221 result_chain
.create (group_size
);
8223 vect_permute_store_chain (dr_chain
, group_size
, stmt_info
, gsi
,
8227 stmt_vec_info next_stmt_info
= first_stmt_info
;
8228 for (i
= 0; i
< vec_num
; i
++)
8231 unsigned HOST_WIDE_INT align
;
8233 tree final_mask
= NULL_TREE
;
8235 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8237 vectype
, vec_num
* j
+ i
);
8239 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8242 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8244 tree scale
= size_int (gs_info
.scale
);
8247 call
= gimple_build_call_internal
8248 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8249 scale
, vec_oprnd
, final_mask
);
8251 call
= gimple_build_call_internal
8252 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8254 gimple_call_set_nothrow (call
, true);
8256 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8261 /* Bump the vector pointer. */
8262 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8266 vec_oprnd
= vec_oprnds
[i
];
8267 else if (grouped_store
)
8268 /* For grouped stores vectorized defs are interleaved in
8269 vect_permute_store_chain(). */
8270 vec_oprnd
= result_chain
[i
];
8272 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8273 if (aligned_access_p (first_dr_info
))
8275 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8277 align
= dr_alignment (vect_dr_behavior (first_dr_info
));
8281 misalign
= DR_MISALIGNMENT (first_dr_info
);
8282 if (dataref_offset
== NULL_TREE
8283 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8284 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8287 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8289 tree perm_mask
= perm_mask_for_reverse (vectype
);
8290 tree perm_dest
= vect_create_destination_var
8291 (vect_get_store_rhs (stmt_info
), vectype
);
8292 tree new_temp
= make_ssa_name (perm_dest
);
8294 /* Generate the permute statement. */
8296 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8297 vec_oprnd
, perm_mask
);
8298 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
8300 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8301 vec_oprnd
= new_temp
;
8304 /* Arguments are ready. Create the new vector stmt. */
8307 align
= least_bit_hwi (misalign
| align
);
8308 tree ptr
= build_int_cst (ref_type
, align
);
8310 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8312 final_mask
, vec_oprnd
);
8313 gimple_call_set_nothrow (call
, true);
8315 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8319 data_ref
= fold_build2 (MEM_REF
, vectype
,
8323 : build_int_cst (ref_type
, 0));
8324 if (aligned_access_p (first_dr_info
))
8326 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8327 TREE_TYPE (data_ref
)
8328 = build_aligned_type (TREE_TYPE (data_ref
),
8329 align
* BITS_PER_UNIT
);
8331 TREE_TYPE (data_ref
)
8332 = build_aligned_type (TREE_TYPE (data_ref
),
8333 TYPE_ALIGN (elem_type
));
8334 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8336 = gimple_build_assign (data_ref
, vec_oprnd
);
8338 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8344 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8345 if (!next_stmt_info
)
8352 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8354 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8355 prev_stmt_info
= new_stmt_info
;
8360 result_chain
.release ();
8361 vec_oprnds
.release ();
8366 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8367 VECTOR_CST mask. No checks are made that the target platform supports the
8368 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8369 vect_gen_perm_mask_checked. */
8372 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8376 poly_uint64 nunits
= sel
.length ();
8377 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8379 mask_type
= build_vector_type (ssizetype
, nunits
);
8380 return vec_perm_indices_to_tree (mask_type
, sel
);
8383 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8384 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8387 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8389 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8390 return vect_gen_perm_mask_any (vectype
, sel
);
8393 /* Given a vector variable X and Y, that was generated for the scalar
8394 STMT_INFO, generate instructions to permute the vector elements of X and Y
8395 using permutation mask MASK_VEC, insert them at *GSI and return the
8396 permuted vector variable. */
8399 permute_vec_elements (tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8400 gimple_stmt_iterator
*gsi
)
8402 tree vectype
= TREE_TYPE (x
);
8403 tree perm_dest
, data_ref
;
8406 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8407 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8408 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8410 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8411 data_ref
= make_ssa_name (perm_dest
);
8413 /* Generate the permute statement. */
8414 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8415 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
8420 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8421 inserting them on the loops preheader edge. Returns true if we
8422 were successful in doing so (and thus STMT_INFO can be moved then),
8423 otherwise returns false. */
8426 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8432 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8434 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8435 if (!gimple_nop_p (def_stmt
)
8436 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8438 /* Make sure we don't need to recurse. While we could do
8439 so in simple cases when there are more complex use webs
8440 we don't have an easy way to preserve stmt order to fulfil
8441 dependencies within them. */
8444 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8446 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8448 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8449 if (!gimple_nop_p (def_stmt2
)
8450 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8460 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8462 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8463 if (!gimple_nop_p (def_stmt
)
8464 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8466 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8467 gsi_remove (&gsi
, false);
8468 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8475 /* vectorizable_load.
8477 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8478 that can be vectorized.
8479 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8480 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8481 Return true if STMT_INFO is vectorizable in this way. */
8484 vectorizable_load (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8485 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
8486 slp_instance slp_node_instance
,
8487 stmt_vector_for_cost
*cost_vec
)
8490 tree vec_dest
= NULL
;
8491 tree data_ref
= NULL
;
8492 stmt_vec_info prev_stmt_info
;
8493 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8494 class loop
*loop
= NULL
;
8495 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8496 bool nested_in_vect_loop
= false;
8501 enum dr_alignment_support alignment_support_scheme
;
8502 tree dataref_ptr
= NULL_TREE
;
8503 tree dataref_offset
= NULL_TREE
;
8504 gimple
*ptr_incr
= NULL
;
8507 unsigned int group_size
;
8508 poly_uint64 group_gap_adj
;
8509 tree msq
= NULL_TREE
, lsq
;
8510 tree offset
= NULL_TREE
;
8511 tree byte_offset
= NULL_TREE
;
8512 tree realignment_token
= NULL_TREE
;
8514 vec
<tree
> dr_chain
= vNULL
;
8515 bool grouped_load
= false;
8516 stmt_vec_info first_stmt_info
;
8517 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8518 bool compute_in_loop
= false;
8519 class loop
*at_loop
;
8521 bool slp
= (slp_node
!= NULL
);
8522 bool slp_perm
= false;
8523 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8526 gather_scatter_info gs_info
;
8527 vec_info
*vinfo
= stmt_info
->vinfo
;
8529 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8531 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8534 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8538 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8539 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8541 scalar_dest
= gimple_assign_lhs (assign
);
8542 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8545 tree_code code
= gimple_assign_rhs_code (assign
);
8546 if (code
!= ARRAY_REF
8547 && code
!= BIT_FIELD_REF
8548 && code
!= INDIRECT_REF
8549 && code
!= COMPONENT_REF
8550 && code
!= IMAGPART_EXPR
8551 && code
!= REALPART_EXPR
8553 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8558 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8559 if (!call
|| !gimple_call_internal_p (call
))
8562 internal_fn ifn
= gimple_call_internal_fn (call
);
8563 if (!internal_load_fn_p (ifn
))
8566 scalar_dest
= gimple_call_lhs (call
);
8570 int mask_index
= internal_fn_mask_index (ifn
);
8571 if (mask_index
>= 0)
8573 mask
= gimple_call_arg (call
, mask_index
);
8574 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
8580 if (!STMT_VINFO_DATA_REF (stmt_info
))
8583 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8584 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8588 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8589 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8590 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8595 /* Multiple types in SLP are handled by creating the appropriate number of
8596 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8601 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8603 gcc_assert (ncopies
>= 1);
8605 /* FORNOW. This restriction should be relaxed. */
8606 if (nested_in_vect_loop
&& ncopies
> 1)
8608 if (dump_enabled_p ())
8609 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8610 "multiple types in nested loop.\n");
8614 /* Invalidate assumptions made by dependence analysis when vectorization
8615 on the unrolled body effectively re-orders stmts. */
8617 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8618 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8619 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8621 if (dump_enabled_p ())
8622 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8623 "cannot perform implicit CSE when unrolling "
8624 "with negative dependence distance\n");
8628 elem_type
= TREE_TYPE (vectype
);
8629 mode
= TYPE_MODE (vectype
);
8631 /* FORNOW. In some cases can vectorize even if data-type not supported
8632 (e.g. - data copies). */
8633 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8635 if (dump_enabled_p ())
8636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8637 "Aligned load, but unsupported type.\n");
8641 /* Check if the load is a part of an interleaving chain. */
8642 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8644 grouped_load
= true;
8646 gcc_assert (!nested_in_vect_loop
);
8647 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8649 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8650 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8652 /* Refuse non-SLP vectorization of SLP-only groups. */
8653 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8655 if (dump_enabled_p ())
8656 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8657 "cannot vectorize load in non-SLP mode.\n");
8661 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8664 /* Invalidate assumptions made by dependence analysis when vectorization
8665 on the unrolled body effectively re-orders stmts. */
8666 if (!PURE_SLP_STMT (stmt_info
)
8667 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8668 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8669 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8671 if (dump_enabled_p ())
8672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8673 "cannot perform implicit CSE when performing "
8674 "group loads with negative dependence distance\n");
8681 vect_memory_access_type memory_access_type
;
8682 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
8683 &memory_access_type
, &gs_info
))
8688 if (memory_access_type
== VMAT_CONTIGUOUS
)
8690 machine_mode vec_mode
= TYPE_MODE (vectype
);
8691 if (!VECTOR_MODE_P (vec_mode
)
8692 || !can_vec_mask_load_store_p (vec_mode
,
8693 TYPE_MODE (mask_vectype
), true))
8696 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8697 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8699 if (dump_enabled_p ())
8700 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8701 "unsupported access type for masked load.\n");
8706 if (!vec_stmt
) /* transformation not required. */
8709 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8712 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
8713 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
8714 memory_access_type
, &gs_info
, mask
);
8716 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8717 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
8718 slp_node_instance
, slp_node
, cost_vec
);
8723 gcc_assert (memory_access_type
8724 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8726 if (dump_enabled_p ())
8727 dump_printf_loc (MSG_NOTE
, vect_location
,
8728 "transform load. ncopies = %d\n", ncopies
);
8732 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8733 ensure_base_align (dr_info
);
8735 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8737 vect_build_gather_load_calls (stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8741 if (memory_access_type
== VMAT_INVARIANT
)
8743 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8744 /* If we have versioned for aliasing or the loop doesn't
8745 have any data dependencies that would preclude this,
8746 then we are sure this is a loop invariant load and
8747 thus we can insert it on the preheader edge. */
8748 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8749 && !nested_in_vect_loop
8750 && hoist_defs_of_uses (stmt_info
, loop
));
8753 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8754 if (dump_enabled_p ())
8755 dump_printf_loc (MSG_NOTE
, vect_location
,
8756 "hoisting out of the vectorized loop: %G", stmt
);
8757 scalar_dest
= copy_ssa_name (scalar_dest
);
8758 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8759 gsi_insert_on_edge_immediate
8760 (loop_preheader_edge (loop
),
8761 gimple_build_assign (scalar_dest
, rhs
));
8763 /* These copies are all equivalent, but currently the representation
8764 requires a separate STMT_VINFO_VEC_STMT for each one. */
8765 prev_stmt_info
= NULL
;
8766 gimple_stmt_iterator gsi2
= *gsi
;
8768 for (j
= 0; j
< ncopies
; j
++)
8770 stmt_vec_info new_stmt_info
;
8773 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8775 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8776 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
8780 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
8782 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8785 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8787 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8789 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8790 prev_stmt_info
= new_stmt_info
;
8795 if (memory_access_type
== VMAT_ELEMENTWISE
8796 || memory_access_type
== VMAT_STRIDED_SLP
)
8798 gimple_stmt_iterator incr_gsi
;
8804 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8805 tree stride_base
, stride_step
, alias_off
;
8806 /* Checked by get_load_store_type. */
8807 unsigned int const_nunits
= nunits
.to_constant ();
8808 unsigned HOST_WIDE_INT cst_offset
= 0;
8810 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8811 gcc_assert (!nested_in_vect_loop
);
8815 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8816 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8820 first_stmt_info
= stmt_info
;
8821 first_dr_info
= dr_info
;
8823 if (slp
&& grouped_load
)
8825 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8826 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8832 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8833 * vect_get_place_in_interleaving_chain (stmt_info
,
8836 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8840 = fold_build_pointer_plus
8841 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8842 size_binop (PLUS_EXPR
,
8843 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
8844 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8845 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8847 /* For a load with loop-invariant (but other than power-of-2)
8848 stride (i.e. not a grouped access) like so:
8850 for (i = 0; i < n; i += stride)
8853 we generate a new induction variable and new accesses to
8854 form a new vector (or vectors, depending on ncopies):
8856 for (j = 0; ; j += VF*stride)
8858 tmp2 = array[j + stride];
8860 vectemp = {tmp1, tmp2, ...}
8863 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8864 build_int_cst (TREE_TYPE (stride_step
), vf
));
8866 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8868 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8869 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8870 create_iv (stride_base
, ivstep
, NULL
,
8871 loop
, &incr_gsi
, insert_after
,
8873 incr
= gsi_stmt (incr_gsi
);
8874 loop_vinfo
->add_stmt (incr
);
8876 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8878 prev_stmt_info
= NULL
;
8879 running_off
= offvar
;
8880 alias_off
= build_int_cst (ref_type
, 0);
8881 int nloads
= const_nunits
;
8883 tree ltype
= TREE_TYPE (vectype
);
8884 tree lvectype
= vectype
;
8885 auto_vec
<tree
> dr_chain
;
8886 if (memory_access_type
== VMAT_STRIDED_SLP
)
8888 if (group_size
< const_nunits
)
8890 /* First check if vec_init optab supports construction from
8891 vector elts directly. */
8892 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
8894 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
8895 && VECTOR_MODE_P (vmode
)
8896 && targetm
.vector_mode_supported_p (vmode
)
8897 && (convert_optab_handler (vec_init_optab
,
8898 TYPE_MODE (vectype
), vmode
)
8899 != CODE_FOR_nothing
))
8901 nloads
= const_nunits
/ group_size
;
8903 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
8907 /* Otherwise avoid emitting a constructor of vector elements
8908 by performing the loads using an integer type of the same
8909 size, constructing a vector of those and then
8910 re-interpreting it as the original vector type.
8911 This avoids a huge runtime penalty due to the general
8912 inability to perform store forwarding from smaller stores
8913 to a larger load. */
8915 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
8916 unsigned int lnunits
= const_nunits
/ group_size
;
8917 /* If we can't construct such a vector fall back to
8918 element loads of the original vector type. */
8919 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8920 && mode_for_vector (elmode
, lnunits
).exists (&vmode
)
8921 && VECTOR_MODE_P (vmode
)
8922 && targetm
.vector_mode_supported_p (vmode
)
8923 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
8924 != CODE_FOR_nothing
))
8928 ltype
= build_nonstandard_integer_type (lsize
, 1);
8929 lvectype
= build_vector_type (ltype
, nloads
);
8936 lnel
= const_nunits
;
8939 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8941 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8942 else if (nloads
== 1)
8947 /* For SLP permutation support we need to load the whole group,
8948 not only the number of vector stmts the permutation result
8952 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8954 unsigned int const_vf
= vf
.to_constant ();
8955 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
8956 dr_chain
.create (ncopies
);
8959 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8961 unsigned int group_el
= 0;
8962 unsigned HOST_WIDE_INT
8963 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8964 for (j
= 0; j
< ncopies
; j
++)
8967 vec_alloc (v
, nloads
);
8968 stmt_vec_info new_stmt_info
= NULL
;
8969 for (i
= 0; i
< nloads
; i
++)
8971 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8972 group_el
* elsz
+ cst_offset
);
8973 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
8974 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8976 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
8978 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8980 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
8981 gimple_assign_lhs (new_stmt
));
8985 || group_el
== group_size
)
8987 tree newoff
= copy_ssa_name (running_off
);
8988 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8989 running_off
, stride_step
);
8990 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
8992 running_off
= newoff
;
8998 tree vec_inv
= build_constructor (lvectype
, v
);
8999 new_temp
= vect_init_vector (stmt_info
, vec_inv
, lvectype
, gsi
);
9000 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9001 if (lvectype
!= vectype
)
9004 = gimple_build_assign (make_ssa_name (vectype
),
9006 build1 (VIEW_CONVERT_EXPR
,
9007 vectype
, new_temp
));
9009 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9016 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
9018 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9023 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9025 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9026 prev_stmt_info
= new_stmt_info
;
9032 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
9033 slp_node_instance
, false, &n_perms
);
9038 if (memory_access_type
== VMAT_GATHER_SCATTER
9039 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9040 grouped_load
= false;
9044 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9045 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9046 /* For SLP vectorization we directly vectorize a subchain
9047 without permutation. */
9048 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9049 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9050 /* For BB vectorization always use the first stmt to base
9051 the data ref pointer on. */
9053 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9055 /* Check if the chain of loads is already vectorized. */
9056 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
9057 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9058 ??? But we can only do so if there is exactly one
9059 as we have no way to get at the rest. Leave the CSE
9061 ??? With the group load eventually participating
9062 in multiple different permutations (having multiple
9063 slp nodes which refer to the same group) the CSE
9064 is even wrong code. See PR56270. */
9067 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9070 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9073 /* VEC_NUM is the number of vect stmts to be created for this group. */
9076 grouped_load
= false;
9077 /* If an SLP permutation is from N elements to N elements,
9078 and if one vector holds a whole number of N, we can load
9079 the inputs to the permutation in the same way as an
9080 unpermuted sequence. In other cases we need to load the
9081 whole group, not only the number of vector stmts the
9082 permutation result fits in. */
9084 && (group_size
!= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
)
9085 || !multiple_p (nunits
, group_size
)))
9087 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9088 variable VF; see vect_transform_slp_perm_load. */
9089 unsigned int const_vf
= vf
.to_constant ();
9090 unsigned int const_nunits
= nunits
.to_constant ();
9091 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9092 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9096 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9098 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
9102 vec_num
= group_size
;
9104 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9108 first_stmt_info
= stmt_info
;
9109 first_dr_info
= dr_info
;
9110 group_size
= vec_num
= 1;
9112 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9115 alignment_support_scheme
9116 = vect_supportable_dr_alignment (first_dr_info
, false);
9117 gcc_assert (alignment_support_scheme
);
9118 vec_loop_masks
*loop_masks
9119 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9120 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9122 /* Targets with store-lane instructions must not require explicit
9123 realignment. vect_supportable_dr_alignment always returns either
9124 dr_aligned or dr_unaligned_supported for masked operations. */
9125 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9128 || alignment_support_scheme
== dr_aligned
9129 || alignment_support_scheme
== dr_unaligned_supported
);
9131 /* In case the vectorization factor (VF) is bigger than the number
9132 of elements that we can fit in a vectype (nunits), we have to generate
9133 more than one vector stmt - i.e - we need to "unroll" the
9134 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9135 from one copy of the vector stmt to the next, in the field
9136 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9137 stages to find the correct vector defs to be used when vectorizing
9138 stmts that use the defs of the current stmt. The example below
9139 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9140 need to create 4 vectorized stmts):
9142 before vectorization:
9143 RELATED_STMT VEC_STMT
9147 step 1: vectorize stmt S1:
9148 We first create the vector stmt VS1_0, and, as usual, record a
9149 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9150 Next, we create the vector stmt VS1_1, and record a pointer to
9151 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9152 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9154 RELATED_STMT VEC_STMT
9155 VS1_0: vx0 = memref0 VS1_1 -
9156 VS1_1: vx1 = memref1 VS1_2 -
9157 VS1_2: vx2 = memref2 VS1_3 -
9158 VS1_3: vx3 = memref3 - -
9159 S1: x = load - VS1_0
9162 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9163 information we recorded in RELATED_STMT field is used to vectorize
9166 /* In case of interleaving (non-unit grouped access):
9173 Vectorized loads are created in the order of memory accesses
9174 starting from the access of the first stmt of the chain:
9177 VS2: vx1 = &base + vec_size*1
9178 VS3: vx3 = &base + vec_size*2
9179 VS4: vx4 = &base + vec_size*3
9181 Then permutation statements are generated:
9183 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9184 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9187 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9188 (the order of the data-refs in the output of vect_permute_load_chain
9189 corresponds to the order of scalar stmts in the interleaving chain - see
9190 the documentation of vect_permute_load_chain()).
9191 The generation of permutation stmts and recording them in
9192 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9194 In case of both multiple types and interleaving, the vector loads and
9195 permutation stmts above are created for every copy. The result vector
9196 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9197 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9199 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9200 on a target that supports unaligned accesses (dr_unaligned_supported)
9201 we generate the following code:
9205 p = p + indx * vectype_size;
9210 Otherwise, the data reference is potentially unaligned on a target that
9211 does not support unaligned accesses (dr_explicit_realign_optimized) -
9212 then generate the following code, in which the data in each iteration is
9213 obtained by two vector loads, one from the previous iteration, and one
9214 from the current iteration:
9216 msq_init = *(floor(p1))
9217 p2 = initial_addr + VS - 1;
9218 realignment_token = call target_builtin;
9221 p2 = p2 + indx * vectype_size
9223 vec_dest = realign_load (msq, lsq, realignment_token)
9228 /* If the misalignment remains the same throughout the execution of the
9229 loop, we can create the init_addr and permutation mask at the loop
9230 preheader. Otherwise, it needs to be created inside the loop.
9231 This can only occur when vectorizing memory accesses in the inner-loop
9232 nested within an outer-loop that is being vectorized. */
9234 if (nested_in_vect_loop
9235 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9236 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9238 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9239 compute_in_loop
= true;
9242 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9243 || alignment_support_scheme
== dr_explicit_realign
)
9244 && !compute_in_loop
)
9246 msq
= vect_setup_realignment (first_stmt_info_for_drptr
9247 ? first_stmt_info_for_drptr
9248 : first_stmt_info
, gsi
, &realignment_token
,
9249 alignment_support_scheme
, NULL_TREE
,
9251 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9253 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9254 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9261 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9262 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9265 tree vec_offset
= NULL_TREE
;
9266 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9268 aggr_type
= NULL_TREE
;
9271 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9273 aggr_type
= elem_type
;
9274 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9275 &bump
, &vec_offset
);
9279 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9280 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9282 aggr_type
= vectype
;
9283 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
9284 memory_access_type
);
9287 tree vec_mask
= NULL_TREE
;
9288 prev_stmt_info
= NULL
;
9289 poly_uint64 group_elt
= 0;
9290 for (j
= 0; j
< ncopies
; j
++)
9292 stmt_vec_info new_stmt_info
= NULL
;
9293 /* 1. Create the vector or array pointer update chain. */
9296 bool simd_lane_access_p
9297 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9298 if (simd_lane_access_p
9299 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9300 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9301 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
9302 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9303 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9304 get_alias_set (TREE_TYPE (ref_type
)))
9305 && (alignment_support_scheme
== dr_aligned
9306 || alignment_support_scheme
== dr_unaligned_supported
))
9308 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9309 dataref_offset
= build_int_cst (ref_type
, 0);
9311 else if (first_stmt_info_for_drptr
9312 && first_stmt_info
!= first_stmt_info_for_drptr
)
9315 = vect_create_data_ref_ptr (first_stmt_info_for_drptr
,
9316 aggr_type
, at_loop
, offset
, &dummy
,
9317 gsi
, &ptr_incr
, simd_lane_access_p
,
9319 /* Adjust the pointer by the difference to first_stmt. */
9320 data_reference_p ptrdr
9321 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9323 = fold_convert (sizetype
,
9324 size_binop (MINUS_EXPR
,
9325 DR_INIT (first_dr_info
->dr
),
9327 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9330 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9331 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
9332 &dataref_ptr
, &vec_offset
);
9335 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
, at_loop
,
9336 offset
, &dummy
, gsi
, &ptr_incr
,
9343 auto_vec
<vec
<tree
> > vec_defs (1);
9344 vect_get_slp_defs (slp_node
, &vec_defs
);
9345 vec_mask
= vec_defs
[0][0];
9348 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
9355 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9357 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9358 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
9360 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9363 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
9366 if (grouped_load
|| slp_perm
)
9367 dr_chain
.create (vec_num
);
9369 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9373 vec_array
= create_vector_array (vectype
, vec_num
);
9375 tree final_mask
= NULL_TREE
;
9377 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9380 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9387 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9389 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9390 tree alias_ptr
= build_int_cst (ref_type
, align
);
9391 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9392 dataref_ptr
, alias_ptr
,
9398 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9399 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9400 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9402 gimple_call_set_lhs (call
, vec_array
);
9403 gimple_call_set_nothrow (call
, true);
9404 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
9406 /* Extract each vector into an SSA_NAME. */
9407 for (i
= 0; i
< vec_num
; i
++)
9409 new_temp
= read_vector_array (stmt_info
, gsi
, scalar_dest
,
9411 dr_chain
.quick_push (new_temp
);
9414 /* Record the mapping between SSA_NAMEs and statements. */
9415 vect_record_grouped_load_vectors (stmt_info
, dr_chain
);
9417 /* Record that VEC_ARRAY is now dead. */
9418 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
9422 for (i
= 0; i
< vec_num
; i
++)
9424 tree final_mask
= NULL_TREE
;
9426 && memory_access_type
!= VMAT_INVARIANT
)
9427 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9429 vectype
, vec_num
* j
+ i
);
9431 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9435 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9438 /* 2. Create the vector-load in the loop. */
9439 gimple
*new_stmt
= NULL
;
9440 switch (alignment_support_scheme
)
9443 case dr_unaligned_supported
:
9445 unsigned int misalign
;
9446 unsigned HOST_WIDE_INT align
;
9448 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9450 tree scale
= size_int (gs_info
.scale
);
9453 call
= gimple_build_call_internal
9454 (IFN_MASK_GATHER_LOAD
, 4, dataref_ptr
,
9455 vec_offset
, scale
, final_mask
);
9457 call
= gimple_build_call_internal
9458 (IFN_GATHER_LOAD
, 3, dataref_ptr
,
9460 gimple_call_set_nothrow (call
, true);
9462 data_ref
= NULL_TREE
;
9467 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9468 if (alignment_support_scheme
== dr_aligned
)
9470 gcc_assert (aligned_access_p (first_dr_info
));
9473 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9475 align
= dr_alignment
9476 (vect_dr_behavior (first_dr_info
));
9480 misalign
= DR_MISALIGNMENT (first_dr_info
);
9481 if (dataref_offset
== NULL_TREE
9482 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9483 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9488 align
= least_bit_hwi (misalign
| align
);
9489 tree ptr
= build_int_cst (ref_type
, align
);
9491 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9494 gimple_call_set_nothrow (call
, true);
9496 data_ref
= NULL_TREE
;
9500 tree ltype
= vectype
;
9501 /* If there's no peeling for gaps but we have a gap
9502 with slp loads then load the lower half of the
9503 vector only. See get_group_load_store_type for
9504 when we apply this optimization. */
9507 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9508 && DR_GROUP_GAP (first_stmt_info
) != 0
9509 && known_eq (nunits
,
9511 - DR_GROUP_GAP (first_stmt_info
)) * 2)
9512 && known_eq (nunits
, group_size
))
9513 ltype
= build_vector_type (TREE_TYPE (vectype
),
9516 (first_stmt_info
)));
9518 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
,
9521 : build_int_cst (ref_type
, 0));
9522 if (alignment_support_scheme
== dr_aligned
)
9524 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9525 TREE_TYPE (data_ref
)
9526 = build_aligned_type (TREE_TYPE (data_ref
),
9527 align
* BITS_PER_UNIT
);
9529 TREE_TYPE (data_ref
)
9530 = build_aligned_type (TREE_TYPE (data_ref
),
9531 TYPE_ALIGN (elem_type
));
9532 if (ltype
!= vectype
)
9534 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9535 tree tem
= make_ssa_name (ltype
);
9536 new_stmt
= gimple_build_assign (tem
, data_ref
);
9537 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9539 vec
<constructor_elt
, va_gc
> *v
;
9541 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9542 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9543 build_zero_cst (ltype
));
9545 = gimple_build_assign (vec_dest
,
9552 case dr_explicit_realign
:
9556 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9558 if (compute_in_loop
)
9559 msq
= vect_setup_realignment (first_stmt_info
, gsi
,
9561 dr_explicit_realign
,
9564 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9565 ptr
= copy_ssa_name (dataref_ptr
);
9567 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9568 // For explicit realign the target alignment should be
9569 // known at compile time.
9570 unsigned HOST_WIDE_INT align
=
9571 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9572 new_stmt
= gimple_build_assign
9573 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9575 (TREE_TYPE (dataref_ptr
),
9576 -(HOST_WIDE_INT
) align
));
9577 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9579 = build2 (MEM_REF
, vectype
, ptr
,
9580 build_int_cst (ref_type
, 0));
9581 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9582 vec_dest
= vect_create_destination_var (scalar_dest
,
9584 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9585 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9586 gimple_assign_set_lhs (new_stmt
, new_temp
);
9587 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9588 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9591 bump
= size_binop (MULT_EXPR
, vs
,
9592 TYPE_SIZE_UNIT (elem_type
));
9593 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9594 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
,
9596 new_stmt
= gimple_build_assign
9597 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9599 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9600 ptr
= copy_ssa_name (ptr
, new_stmt
);
9601 gimple_assign_set_lhs (new_stmt
, ptr
);
9602 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9604 = build2 (MEM_REF
, vectype
, ptr
,
9605 build_int_cst (ref_type
, 0));
9608 case dr_explicit_realign_optimized
:
9610 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9611 new_temp
= copy_ssa_name (dataref_ptr
);
9613 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9614 // We should only be doing this if we know the target
9615 // alignment at compile time.
9616 unsigned HOST_WIDE_INT align
=
9617 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9618 new_stmt
= gimple_build_assign
9619 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9620 build_int_cst (TREE_TYPE (dataref_ptr
),
9621 -(HOST_WIDE_INT
) align
));
9622 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9624 = build2 (MEM_REF
, vectype
, new_temp
,
9625 build_int_cst (ref_type
, 0));
9631 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9632 /* DATA_REF is null if we've already built the statement. */
9635 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9636 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9638 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9639 gimple_set_lhs (new_stmt
, new_temp
);
9641 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9643 /* 3. Handle explicit realignment if necessary/supported.
9645 vec_dest = realign_load (msq, lsq, realignment_token) */
9646 if (alignment_support_scheme
== dr_explicit_realign_optimized
9647 || alignment_support_scheme
== dr_explicit_realign
)
9649 lsq
= gimple_assign_lhs (new_stmt
);
9650 if (!realignment_token
)
9651 realignment_token
= dataref_ptr
;
9652 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9653 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9654 msq
, lsq
, realignment_token
);
9655 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9656 gimple_assign_set_lhs (new_stmt
, new_temp
);
9658 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9660 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9663 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9664 add_phi_arg (phi
, lsq
,
9665 loop_latch_edge (containing_loop
),
9671 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9673 tree perm_mask
= perm_mask_for_reverse (vectype
);
9674 new_temp
= permute_vec_elements (new_temp
, new_temp
,
9675 perm_mask
, stmt_info
, gsi
);
9676 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9679 /* Collect vector loads and later create their permutation in
9680 vect_transform_grouped_load (). */
9681 if (grouped_load
|| slp_perm
)
9682 dr_chain
.quick_push (new_temp
);
9684 /* Store vector loads in the corresponding SLP_NODE. */
9685 if (slp
&& !slp_perm
)
9686 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9688 /* With SLP permutation we load the gaps as well, without
9689 we need to skip the gaps after we manage to fully load
9690 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9691 group_elt
+= nunits
;
9692 if (maybe_ne (group_gap_adj
, 0U)
9694 && known_eq (group_elt
, group_size
- group_gap_adj
))
9696 poly_wide_int bump_val
9697 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9699 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9700 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9705 /* Bump the vector pointer to account for a gap or for excess
9706 elements loaded for a permuted SLP load. */
9707 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9709 poly_wide_int bump_val
9710 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9712 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9713 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
9718 if (slp
&& !slp_perm
)
9724 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
9725 slp_node_instance
, false,
9728 dr_chain
.release ();
9736 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9737 vect_transform_grouped_load (stmt_info
, dr_chain
,
9739 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9744 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9746 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9747 prev_stmt_info
= new_stmt_info
;
9750 dr_chain
.release ();
9756 /* Function vect_is_simple_cond.
9759 LOOP - the loop that is being vectorized.
9760 COND - Condition that is checked for simple use.
9763 *COMP_VECTYPE - the vector type for the comparison.
9764 *DTS - The def types for the arguments of the comparison
9766 Returns whether a COND can be vectorized. Checks whether
9767 condition operands are supportable using vec_is_simple_use. */
9770 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
9771 tree
*comp_vectype
, enum vect_def_type
*dts
,
9775 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9778 if (TREE_CODE (cond
) == SSA_NAME
9779 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9781 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
9783 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9788 if (!COMPARISON_CLASS_P (cond
))
9791 lhs
= TREE_OPERAND (cond
, 0);
9792 rhs
= TREE_OPERAND (cond
, 1);
9794 if (TREE_CODE (lhs
) == SSA_NAME
)
9796 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
9799 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9800 || TREE_CODE (lhs
) == FIXED_CST
)
9801 dts
[0] = vect_constant_def
;
9805 if (TREE_CODE (rhs
) == SSA_NAME
)
9807 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
9810 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
9811 || TREE_CODE (rhs
) == FIXED_CST
)
9812 dts
[1] = vect_constant_def
;
9816 if (vectype1
&& vectype2
9817 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9818 TYPE_VECTOR_SUBPARTS (vectype2
)))
9821 *comp_vectype
= vectype1
? vectype1
: vectype2
;
9822 /* Invariant comparison. */
9823 if (! *comp_vectype
)
9825 tree scalar_type
= TREE_TYPE (lhs
);
9826 /* If we can widen the comparison to match vectype do so. */
9827 if (INTEGRAL_TYPE_P (scalar_type
)
9829 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
9830 TYPE_SIZE (TREE_TYPE (vectype
))))
9831 scalar_type
= build_nonstandard_integer_type
9832 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
9833 TYPE_UNSIGNED (scalar_type
));
9834 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
9840 /* vectorizable_condition.
9842 Check if STMT_INFO is conditional modify expression that can be vectorized.
9843 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9844 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9847 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9849 Return true if STMT_INFO is vectorizable in this way. */
9852 vectorizable_condition (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9853 stmt_vec_info
*vec_stmt
,
9854 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9856 vec_info
*vinfo
= stmt_info
->vinfo
;
9857 tree scalar_dest
= NULL_TREE
;
9858 tree vec_dest
= NULL_TREE
;
9859 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
9860 tree then_clause
, else_clause
;
9861 tree comp_vectype
= NULL_TREE
;
9862 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
9863 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
9866 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9867 enum vect_def_type dts
[4]
9868 = {vect_unknown_def_type
, vect_unknown_def_type
,
9869 vect_unknown_def_type
, vect_unknown_def_type
};
9872 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9873 stmt_vec_info prev_stmt_info
= NULL
;
9875 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9876 vec
<tree
> vec_oprnds0
= vNULL
;
9877 vec
<tree
> vec_oprnds1
= vNULL
;
9878 vec
<tree
> vec_oprnds2
= vNULL
;
9879 vec
<tree
> vec_oprnds3
= vNULL
;
9881 bool masked
= false;
9883 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9886 /* Is vectorizable conditional operation? */
9887 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9891 code
= gimple_assign_rhs_code (stmt
);
9892 if (code
!= COND_EXPR
)
9895 stmt_vec_info reduc_info
= NULL
;
9896 int reduc_index
= -1;
9897 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
9899 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
9902 if (STMT_SLP_TYPE (stmt_info
))
9904 reduc_info
= info_for_reduction (stmt_info
);
9905 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
9906 reduc_index
= STMT_VINFO_REDUC_IDX (reduc_info
);
9907 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
9908 || reduc_index
!= -1);
9912 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
9915 /* FORNOW: only supported as part of a reduction. */
9916 if (STMT_VINFO_LIVE_P (stmt_info
))
9918 if (dump_enabled_p ())
9919 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9920 "value used after loop.\n");
9925 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9926 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9931 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9933 gcc_assert (ncopies
>= 1);
9934 if (for_reduction
&& ncopies
> 1)
9935 return false; /* FORNOW */
9937 cond_expr
= gimple_assign_rhs1 (stmt
);
9938 then_clause
= gimple_assign_rhs2 (stmt
);
9939 else_clause
= gimple_assign_rhs3 (stmt
);
9941 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
9942 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
9946 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
9948 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
9951 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
9954 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
9957 masked
= !COMPARISON_CLASS_P (cond_expr
);
9958 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
9960 if (vec_cmp_type
== NULL_TREE
)
9963 cond_code
= TREE_CODE (cond_expr
);
9966 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
9967 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
9970 /* For conditional reductions, the "then" value needs to be the candidate
9971 value calculated by this iteration while the "else" value needs to be
9972 the result carried over from previous iterations. If the COND_EXPR
9973 is the other way around, we need to swap it. */
9974 bool must_invert_cmp_result
= false;
9975 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
9978 must_invert_cmp_result
= true;
9981 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
9982 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
9983 if (new_code
== ERROR_MARK
)
9984 must_invert_cmp_result
= true;
9986 cond_code
= new_code
;
9988 /* Make sure we don't accidentally use the old condition. */
9989 cond_expr
= NULL_TREE
;
9990 std::swap (then_clause
, else_clause
);
9993 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
9995 /* Boolean values may have another representation in vectors
9996 and therefore we prefer bit operations over comparison for
9997 them (which also works for scalar masks). We store opcodes
9998 to use in bitop1 and bitop2. Statement is vectorized as
9999 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10000 depending on bitop1 and bitop2 arity. */
10004 bitop1
= BIT_NOT_EXPR
;
10005 bitop2
= BIT_AND_EXPR
;
10008 bitop1
= BIT_NOT_EXPR
;
10009 bitop2
= BIT_IOR_EXPR
;
10012 bitop1
= BIT_NOT_EXPR
;
10013 bitop2
= BIT_AND_EXPR
;
10014 std::swap (cond_expr0
, cond_expr1
);
10017 bitop1
= BIT_NOT_EXPR
;
10018 bitop2
= BIT_IOR_EXPR
;
10019 std::swap (cond_expr0
, cond_expr1
);
10022 bitop1
= BIT_XOR_EXPR
;
10025 bitop1
= BIT_XOR_EXPR
;
10026 bitop2
= BIT_NOT_EXPR
;
10031 cond_code
= SSA_NAME
;
10036 if (bitop1
!= NOP_EXPR
)
10038 machine_mode mode
= TYPE_MODE (comp_vectype
);
10041 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10042 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10045 if (bitop2
!= NOP_EXPR
)
10047 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10049 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10053 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
10056 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10057 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10068 vec_oprnds0
.create (1);
10069 vec_oprnds1
.create (1);
10070 vec_oprnds2
.create (1);
10071 vec_oprnds3
.create (1);
10075 scalar_dest
= gimple_assign_lhs (stmt
);
10076 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10077 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10079 /* Handle cond expr. */
10080 for (j
= 0; j
< ncopies
; j
++)
10082 bool swap_cond_operands
= false;
10084 /* See whether another part of the vectorized code applies a loop
10085 mask to the condition, or to its inverse. */
10087 vec_loop_masks
*masks
= NULL
;
10088 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10090 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10091 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10094 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10095 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10096 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10099 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10100 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10101 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10103 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10104 cond_code
= cond
.code
;
10105 swap_cond_operands
= true;
10111 stmt_vec_info new_stmt_info
= NULL
;
10116 auto_vec
<vec
<tree
>, 4> vec_defs
;
10117 vect_get_slp_defs (slp_node
, &vec_defs
);
10118 vec_oprnds3
= vec_defs
.pop ();
10119 vec_oprnds2
= vec_defs
.pop ();
10121 vec_oprnds1
= vec_defs
.pop ();
10122 vec_oprnds0
= vec_defs
.pop ();
10129 = vect_get_vec_def_for_operand (cond_expr
, stmt_info
,
10135 = vect_get_vec_def_for_operand (cond_expr0
,
10136 stmt_info
, comp_vectype
);
10138 = vect_get_vec_def_for_operand (cond_expr1
,
10139 stmt_info
, comp_vectype
);
10141 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
10143 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10144 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
10151 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
10154 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
10156 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10157 vec_oprnds2
.pop ());
10158 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10159 vec_oprnds3
.pop ());
10164 vec_oprnds0
.quick_push (vec_cond_lhs
);
10166 vec_oprnds1
.quick_push (vec_cond_rhs
);
10167 vec_oprnds2
.quick_push (vec_then_clause
);
10168 vec_oprnds3
.quick_push (vec_else_clause
);
10171 /* Arguments are ready. Create the new vector stmt. */
10172 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10174 vec_then_clause
= vec_oprnds2
[i
];
10175 vec_else_clause
= vec_oprnds3
[i
];
10177 if (swap_cond_operands
)
10178 std::swap (vec_then_clause
, vec_else_clause
);
10181 vec_compare
= vec_cond_lhs
;
10184 vec_cond_rhs
= vec_oprnds1
[i
];
10185 if (bitop1
== NOP_EXPR
)
10186 vec_compare
= build2 (cond_code
, vec_cmp_type
,
10187 vec_cond_lhs
, vec_cond_rhs
);
10190 new_temp
= make_ssa_name (vec_cmp_type
);
10192 if (bitop1
== BIT_NOT_EXPR
)
10193 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10197 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10199 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10200 if (bitop2
== NOP_EXPR
)
10201 vec_compare
= new_temp
;
10202 else if (bitop2
== BIT_NOT_EXPR
)
10204 /* Instead of doing ~x ? y : z do x ? z : y. */
10205 vec_compare
= new_temp
;
10206 std::swap (vec_then_clause
, vec_else_clause
);
10210 vec_compare
= make_ssa_name (vec_cmp_type
);
10212 = gimple_build_assign (vec_compare
, bitop2
,
10213 vec_cond_lhs
, new_temp
);
10214 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10219 /* If we decided to apply a loop mask to the result of the vector
10220 comparison, AND the comparison with the mask now. Later passes
10221 should then be able to reuse the AND results between mulitple
10225 for (int i = 0; i < 100; ++i)
10226 x[i] = y[i] ? z[i] : 10;
10228 results in following optimized GIMPLE:
10230 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10231 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10232 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10233 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10234 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10235 vect_iftmp.11_47, { 10, ... }>;
10237 instead of using a masked and unmasked forms of
10238 vec != { 0, ... } (masked in the MASK_LOAD,
10239 unmasked in the VEC_COND_EXPR). */
10241 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10242 in cases where that's necessary. */
10244 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10246 if (!is_gimple_val (vec_compare
))
10248 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10249 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10251 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10252 vec_compare
= vec_compare_name
;
10255 if (must_invert_cmp_result
)
10257 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10258 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10261 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10262 vec_compare
= vec_compare_name
;
10267 unsigned vec_num
= vec_oprnds0
.length ();
10269 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10270 vectype
, vec_num
* j
+ i
);
10271 tree tmp2
= make_ssa_name (vec_cmp_type
);
10273 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10275 vect_finish_stmt_generation (stmt_info
, g
, gsi
);
10276 vec_compare
= tmp2
;
10280 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10282 gcall
*new_stmt
= gimple_build_call_internal
10283 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10285 gimple_call_set_lhs (new_stmt
, scalar_dest
);
10286 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
10287 if (stmt_info
->stmt
== gsi_stmt (*gsi
))
10288 new_stmt_info
= vect_finish_replace_stmt (stmt_info
, new_stmt
);
10291 /* In this case we're moving the definition to later in the
10292 block. That doesn't matter because the only uses of the
10293 lhs are in phi statements. */
10294 gimple_stmt_iterator old_gsi
10295 = gsi_for_stmt (stmt_info
->stmt
);
10296 gsi_remove (&old_gsi
, true);
10298 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10303 new_temp
= make_ssa_name (vec_dest
);
10305 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10306 vec_then_clause
, vec_else_clause
);
10308 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10311 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10318 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10320 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10322 prev_stmt_info
= new_stmt_info
;
10325 vec_oprnds0
.release ();
10326 vec_oprnds1
.release ();
10327 vec_oprnds2
.release ();
10328 vec_oprnds3
.release ();
10333 /* vectorizable_comparison.
10335 Check if STMT_INFO is comparison expression that can be vectorized.
10336 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10337 comparison, put it in VEC_STMT, and insert it at GSI.
10339 Return true if STMT_INFO is vectorizable in this way. */
10342 vectorizable_comparison (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10343 stmt_vec_info
*vec_stmt
,
10344 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10346 vec_info
*vinfo
= stmt_info
->vinfo
;
10347 tree lhs
, rhs1
, rhs2
;
10348 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10349 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10350 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10352 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10353 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10355 poly_uint64 nunits
;
10357 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10358 stmt_vec_info prev_stmt_info
= NULL
;
10360 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
10361 vec
<tree
> vec_oprnds0
= vNULL
;
10362 vec
<tree
> vec_oprnds1
= vNULL
;
10366 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10369 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10372 mask_type
= vectype
;
10373 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10378 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10380 gcc_assert (ncopies
>= 1);
10381 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10384 if (STMT_VINFO_LIVE_P (stmt_info
))
10386 if (dump_enabled_p ())
10387 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10388 "value used after loop.\n");
10392 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10396 code
= gimple_assign_rhs_code (stmt
);
10398 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10401 rhs1
= gimple_assign_rhs1 (stmt
);
10402 rhs2
= gimple_assign_rhs2 (stmt
);
10404 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
10407 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
10410 if (vectype1
&& vectype2
10411 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10412 TYPE_VECTOR_SUBPARTS (vectype2
)))
10415 vectype
= vectype1
? vectype1
: vectype2
;
10417 /* Invariant comparison. */
10420 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
));
10421 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10424 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10427 /* Can't compare mask and non-mask types. */
10428 if (vectype1
&& vectype2
10429 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10432 /* Boolean values may have another representation in vectors
10433 and therefore we prefer bit operations over comparison for
10434 them (which also works for scalar masks). We store opcodes
10435 to use in bitop1 and bitop2. Statement is vectorized as
10436 BITOP2 (rhs1 BITOP1 rhs2) or
10437 rhs1 BITOP2 (BITOP1 rhs2)
10438 depending on bitop1 and bitop2 arity. */
10439 bool swap_p
= false;
10440 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10442 if (code
== GT_EXPR
)
10444 bitop1
= BIT_NOT_EXPR
;
10445 bitop2
= BIT_AND_EXPR
;
10447 else if (code
== GE_EXPR
)
10449 bitop1
= BIT_NOT_EXPR
;
10450 bitop2
= BIT_IOR_EXPR
;
10452 else if (code
== LT_EXPR
)
10454 bitop1
= BIT_NOT_EXPR
;
10455 bitop2
= BIT_AND_EXPR
;
10458 else if (code
== LE_EXPR
)
10460 bitop1
= BIT_NOT_EXPR
;
10461 bitop2
= BIT_IOR_EXPR
;
10466 bitop1
= BIT_XOR_EXPR
;
10467 if (code
== EQ_EXPR
)
10468 bitop2
= BIT_NOT_EXPR
;
10474 if (bitop1
== NOP_EXPR
)
10476 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10481 machine_mode mode
= TYPE_MODE (vectype
);
10484 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10485 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10488 if (bitop2
!= NOP_EXPR
)
10490 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10491 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10496 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10497 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10498 dts
, ndts
, slp_node
, cost_vec
);
10505 vec_oprnds0
.create (1);
10506 vec_oprnds1
.create (1);
10510 lhs
= gimple_assign_lhs (stmt
);
10511 mask
= vect_create_destination_var (lhs
, mask_type
);
10513 /* Handle cmp expr. */
10514 for (j
= 0; j
< ncopies
; j
++)
10516 stmt_vec_info new_stmt_info
= NULL
;
10521 auto_vec
<vec
<tree
>, 2> vec_defs
;
10522 vect_get_slp_defs (slp_node
, &vec_defs
);
10523 vec_oprnds1
= vec_defs
.pop ();
10524 vec_oprnds0
= vec_defs
.pop ();
10526 std::swap (vec_oprnds0
, vec_oprnds1
);
10530 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt_info
,
10532 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt_info
,
10538 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
10539 vec_oprnds0
.pop ());
10540 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
10541 vec_oprnds1
.pop ());
10546 if (swap_p
&& j
== 0)
10547 std::swap (vec_rhs1
, vec_rhs2
);
10548 vec_oprnds0
.quick_push (vec_rhs1
);
10549 vec_oprnds1
.quick_push (vec_rhs2
);
10552 /* Arguments are ready. Create the new vector stmt. */
10553 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10555 vec_rhs2
= vec_oprnds1
[i
];
10557 new_temp
= make_ssa_name (mask
);
10558 if (bitop1
== NOP_EXPR
)
10560 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
10561 vec_rhs1
, vec_rhs2
);
10563 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10568 if (bitop1
== BIT_NOT_EXPR
)
10569 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10571 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10574 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10575 if (bitop2
!= NOP_EXPR
)
10577 tree res
= make_ssa_name (mask
);
10578 if (bitop2
== BIT_NOT_EXPR
)
10579 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10581 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10584 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
10588 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10595 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10597 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10599 prev_stmt_info
= new_stmt_info
;
10602 vec_oprnds0
.release ();
10603 vec_oprnds1
.release ();
10608 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10609 can handle all live statements in the node. Otherwise return true
10610 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10611 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10614 can_vectorize_live_stmts (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10615 slp_tree slp_node
, slp_instance slp_node_instance
,
10617 stmt_vector_for_cost
*cost_vec
)
10621 stmt_vec_info slp_stmt_info
;
10623 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10625 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10626 && !vectorizable_live_operation (slp_stmt_info
, gsi
, slp_node
,
10627 slp_node_instance
, i
,
10628 vec_stmt_p
, cost_vec
))
10632 else if (STMT_VINFO_LIVE_P (stmt_info
)
10633 && !vectorizable_live_operation (stmt_info
, gsi
, slp_node
,
10634 slp_node_instance
, -1,
10635 vec_stmt_p
, cost_vec
))
10641 /* Make sure the statement is vectorizable. */
10644 vect_analyze_stmt (stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10645 slp_tree node
, slp_instance node_instance
,
10646 stmt_vector_for_cost
*cost_vec
)
10648 vec_info
*vinfo
= stmt_info
->vinfo
;
10649 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
10650 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10652 gimple_seq pattern_def_seq
;
10654 if (dump_enabled_p ())
10655 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10658 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10659 return opt_result::failure_at (stmt_info
->stmt
,
10661 " stmt has volatile operands: %G\n",
10664 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10666 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10668 gimple_stmt_iterator si
;
10670 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10672 stmt_vec_info pattern_def_stmt_info
10673 = vinfo
->lookup_stmt (gsi_stmt (si
));
10674 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10675 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10677 /* Analyze def stmt of STMT if it's a pattern stmt. */
10678 if (dump_enabled_p ())
10679 dump_printf_loc (MSG_NOTE
, vect_location
,
10680 "==> examining pattern def statement: %G",
10681 pattern_def_stmt_info
->stmt
);
10684 = vect_analyze_stmt (pattern_def_stmt_info
,
10685 need_to_vectorize
, node
, node_instance
,
10693 /* Skip stmts that do not need to be vectorized. In loops this is expected
10695 - the COND_EXPR which is the loop exit condition
10696 - any LABEL_EXPRs in the loop
10697 - computations that are used only for array indexing or loop control.
10698 In basic blocks we only analyze statements that are a part of some SLP
10699 instance, therefore, all the statements are relevant.
10701 Pattern statement needs to be analyzed instead of the original statement
10702 if the original statement is not relevant. Otherwise, we analyze both
10703 statements. In basic blocks we are called from some SLP instance
10704 traversal, don't analyze pattern stmts instead, the pattern stmts
10705 already will be part of SLP instance. */
10707 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10708 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10709 && !STMT_VINFO_LIVE_P (stmt_info
))
10711 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10712 && pattern_stmt_info
10713 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10714 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10716 /* Analyze PATTERN_STMT instead of the original stmt. */
10717 stmt_info
= pattern_stmt_info
;
10718 if (dump_enabled_p ())
10719 dump_printf_loc (MSG_NOTE
, vect_location
,
10720 "==> examining pattern statement: %G",
10725 if (dump_enabled_p ())
10726 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10728 return opt_result::success ();
10731 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10733 && pattern_stmt_info
10734 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10735 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10737 /* Analyze PATTERN_STMT too. */
10738 if (dump_enabled_p ())
10739 dump_printf_loc (MSG_NOTE
, vect_location
,
10740 "==> examining pattern statement: %G",
10741 pattern_stmt_info
->stmt
);
10744 = vect_analyze_stmt (pattern_stmt_info
, need_to_vectorize
, node
,
10745 node_instance
, cost_vec
);
10750 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10752 case vect_internal_def
:
10755 case vect_reduction_def
:
10756 case vect_nested_cycle
:
10757 gcc_assert (!bb_vinfo
10758 && (relevance
== vect_used_in_outer
10759 || relevance
== vect_used_in_outer_by_reduction
10760 || relevance
== vect_used_by_reduction
10761 || relevance
== vect_unused_in_scope
10762 || relevance
== vect_used_only_live
));
10765 case vect_induction_def
:
10766 gcc_assert (!bb_vinfo
);
10769 case vect_constant_def
:
10770 case vect_external_def
:
10771 case vect_unknown_def_type
:
10773 gcc_unreachable ();
10776 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10778 tree type
= gimple_expr_type (stmt_info
->stmt
);
10779 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
10780 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10781 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10782 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10783 *need_to_vectorize
= true;
10786 if (PURE_SLP_STMT (stmt_info
) && !node
)
10788 if (dump_enabled_p ())
10789 dump_printf_loc (MSG_NOTE
, vect_location
,
10790 "handled only by SLP analysis\n");
10791 return opt_result::success ();
10796 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10797 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10798 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10799 -mveclibabi= takes preference over library functions with
10800 the simd attribute. */
10801 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10802 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
10804 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10805 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10806 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10807 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
10809 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10810 || vectorizable_reduction (stmt_info
, node
, node_instance
, cost_vec
)
10811 || vectorizable_induction (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10812 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10813 || vectorizable_condition (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10814 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
10816 || vectorizable_lc_phi (stmt_info
, NULL
, node
));
10820 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10821 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
10823 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
,
10825 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10826 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10827 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
,
10829 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
10831 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10832 || vectorizable_condition (stmt_info
, NULL
, NULL
, node
, cost_vec
)
10833 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
10838 return opt_result::failure_at (stmt_info
->stmt
,
10840 " relevant stmt not supported: %G",
10843 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10844 need extra handling, except for vectorizable reductions. */
10846 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10847 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
10848 && !can_vectorize_live_stmts (stmt_info
, NULL
, node
, node_instance
,
10850 return opt_result::failure_at (stmt_info
->stmt
,
10852 " live stmt not supported: %G",
10855 return opt_result::success ();
10859 /* Function vect_transform_stmt.
10861 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10864 vect_transform_stmt (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10865 slp_tree slp_node
, slp_instance slp_node_instance
)
10867 vec_info
*vinfo
= stmt_info
->vinfo
;
10868 bool is_store
= false;
10869 stmt_vec_info vec_stmt
= NULL
;
10872 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
10873 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
10875 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
10876 && nested_in_vect_loop_p
10877 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
10880 gimple
*stmt
= stmt_info
->stmt
;
10881 switch (STMT_VINFO_TYPE (stmt_info
))
10883 case type_demotion_vec_info_type
:
10884 case type_promotion_vec_info_type
:
10885 case type_conversion_vec_info_type
:
10886 done
= vectorizable_conversion (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10891 case induc_vec_info_type
:
10892 done
= vectorizable_induction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10897 case shift_vec_info_type
:
10898 done
= vectorizable_shift (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10902 case op_vec_info_type
:
10903 done
= vectorizable_operation (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10908 case assignment_vec_info_type
:
10909 done
= vectorizable_assignment (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10914 case load_vec_info_type
:
10915 done
= vectorizable_load (stmt_info
, gsi
, &vec_stmt
, slp_node
,
10916 slp_node_instance
, NULL
);
10920 case store_vec_info_type
:
10921 done
= vectorizable_store (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10923 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
10925 /* In case of interleaving, the whole chain is vectorized when the
10926 last store in the chain is reached. Store stmts before the last
10927 one are skipped, and there vec_stmt_info shouldn't be freed
10929 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10930 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
10937 case condition_vec_info_type
:
10938 done
= vectorizable_condition (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10942 case comparison_vec_info_type
:
10943 done
= vectorizable_comparison (stmt_info
, gsi
, &vec_stmt
,
10948 case call_vec_info_type
:
10949 done
= vectorizable_call (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
10950 stmt
= gsi_stmt (*gsi
);
10953 case call_simd_clone_vec_info_type
:
10954 done
= vectorizable_simd_clone_call (stmt_info
, gsi
, &vec_stmt
,
10956 stmt
= gsi_stmt (*gsi
);
10959 case reduc_vec_info_type
:
10960 done
= vect_transform_reduction (stmt_info
, gsi
, &vec_stmt
, slp_node
);
10964 case cycle_phi_info_type
:
10965 done
= vect_transform_cycle_phi (stmt_info
, &vec_stmt
, slp_node
,
10966 slp_node_instance
);
10970 case lc_phi_info_type
:
10971 done
= vectorizable_lc_phi (stmt_info
, &vec_stmt
, slp_node
);
10976 if (!STMT_VINFO_LIVE_P (stmt_info
))
10978 if (dump_enabled_p ())
10979 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10980 "stmt not supported.\n");
10981 gcc_unreachable ();
10985 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
10986 This would break hybrid SLP vectorization. */
10988 gcc_assert (!vec_stmt
10989 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
10991 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
10992 is being vectorized, but outside the immediately enclosing loop. */
10995 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10996 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
10997 || STMT_VINFO_RELEVANT (stmt_info
) ==
10998 vect_used_in_outer_by_reduction
))
11000 class loop
*innerloop
= LOOP_VINFO_LOOP (
11001 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
11002 imm_use_iterator imm_iter
;
11003 use_operand_p use_p
;
11006 if (dump_enabled_p ())
11007 dump_printf_loc (MSG_NOTE
, vect_location
,
11008 "Record the vdef for outer-loop vectorization.\n");
11010 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11011 (to be used when vectorizing outer-loop stmts that use the DEF of
11013 if (gimple_code (stmt
) == GIMPLE_PHI
)
11014 scalar_dest
= PHI_RESULT (stmt
);
11016 scalar_dest
= gimple_get_lhs (stmt
);
11018 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
11019 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
11021 stmt_vec_info exit_phi_info
11022 = vinfo
->lookup_stmt (USE_STMT (use_p
));
11023 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
11028 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
11030 if (STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
11033 /* If this stmt defines a value used on a backedge, update the
11034 vectorized PHIs. */
11035 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
11036 stmt_vec_info reduc_info
;
11037 if (STMT_VINFO_REDUC_DEF (orig_stmt_info
)
11038 && vect_stmt_to_vectorize (orig_stmt_info
) == stmt_info
11039 && (reduc_info
= info_for_reduction (orig_stmt_info
))
11040 && STMT_VINFO_REDUC_TYPE (reduc_info
) != FOLD_LEFT_REDUCTION
11041 && STMT_VINFO_REDUC_TYPE (reduc_info
) != EXTRACT_LAST_REDUCTION
)
11046 && (phi
= dyn_cast
<gphi
*>
11047 (STMT_VINFO_REDUC_DEF (orig_stmt_info
)->stmt
))
11048 && dominated_by_p (CDI_DOMINATORS
,
11049 gimple_bb (orig_stmt_info
->stmt
), gimple_bb (phi
))
11050 && (e
= loop_latch_edge (gimple_bb (phi
)->loop_father
))
11051 && (PHI_ARG_DEF_FROM_EDGE (phi
, e
)
11052 == gimple_get_lhs (orig_stmt_info
->stmt
)))
11054 stmt_vec_info phi_info
11055 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info
));
11056 stmt_vec_info vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
11059 add_phi_arg (as_a
<gphi
*> (phi_info
->stmt
),
11060 gimple_get_lhs (vec_stmt
->stmt
), e
,
11061 gimple_phi_arg_location (phi
, e
->dest_idx
));
11062 phi_info
= STMT_VINFO_RELATED_STMT (phi_info
);
11063 vec_stmt
= STMT_VINFO_RELATED_STMT (vec_stmt
);
11066 gcc_assert (!vec_stmt
);
11069 && slp_node
!= slp_node_instance
->reduc_phis
)
11071 slp_tree phi_node
= slp_node_instance
->reduc_phis
;
11072 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_SCALAR_STMTS (phi_node
)[0]->stmt
);
11073 e
= loop_latch_edge (gimple_bb (phi
)->loop_father
);
11074 gcc_assert (SLP_TREE_VEC_STMTS (phi_node
).length ()
11075 == SLP_TREE_VEC_STMTS (slp_node
).length ());
11076 for (unsigned i
= 0; i
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++i
)
11077 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[i
]->stmt
),
11078 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]->stmt
),
11079 e
, gimple_phi_arg_location (phi
, e
->dest_idx
));
11083 /* Handle stmts whose DEF is used outside the loop-nest that is
11084 being vectorized. */
11085 done
= can_vectorize_live_stmts (stmt_info
, gsi
, slp_node
,
11086 slp_node_instance
, true, NULL
);
11093 /* Remove a group of stores (for SLP or interleaving), free their
11097 vect_remove_stores (stmt_vec_info first_stmt_info
)
11099 vec_info
*vinfo
= first_stmt_info
->vinfo
;
11100 stmt_vec_info next_stmt_info
= first_stmt_info
;
11102 while (next_stmt_info
)
11104 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11105 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11106 /* Free the attached stmt_vec_info and remove the stmt. */
11107 vinfo
->remove_stmt (next_stmt_info
);
11108 next_stmt_info
= tmp
;
11112 /* Function get_vectype_for_scalar_type_and_size.
11114 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
11118 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
11120 tree orig_scalar_type
= scalar_type
;
11121 scalar_mode inner_mode
;
11122 machine_mode simd_mode
;
11123 poly_uint64 nunits
;
11126 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11127 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11130 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11132 /* For vector types of elements whose mode precision doesn't
11133 match their types precision we use a element type of mode
11134 precision. The vectorization routines will have to make sure
11135 they support the proper result truncation/extension.
11136 We also make sure to build vector types with INTEGER_TYPE
11137 component type only. */
11138 if (INTEGRAL_TYPE_P (scalar_type
)
11139 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11140 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11141 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11142 TYPE_UNSIGNED (scalar_type
));
11144 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11145 When the component mode passes the above test simply use a type
11146 corresponding to that mode. The theory is that any use that
11147 would cause problems with this will disable vectorization anyway. */
11148 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11149 && !INTEGRAL_TYPE_P (scalar_type
))
11150 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11152 /* We can't build a vector type of elements with alignment bigger than
11154 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11155 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11156 TYPE_UNSIGNED (scalar_type
));
11158 /* If we felt back to using the mode fail if there was
11159 no scalar type for it. */
11160 if (scalar_type
== NULL_TREE
)
11163 /* If no size was supplied use the mode the target prefers. Otherwise
11164 lookup a vector mode of the specified size. */
11165 if (known_eq (size
, 0U))
11166 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11167 else if (!multiple_p (size
, nbytes
, &nunits
)
11168 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11170 /* NOTE: nunits == 1 is allowed to support single element vector types. */
11171 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
11174 vectype
= build_vector_type (scalar_type
, nunits
);
11176 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11177 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11180 /* Re-attach the address-space qualifier if we canonicalized the scalar
11182 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11183 return build_qualified_type
11184 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11189 /* Function get_vectype_for_scalar_type.
11191 Returns the vector type corresponding to SCALAR_TYPE as supported
11195 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
)
11198 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
11199 vinfo
->vector_size
);
11201 && known_eq (vinfo
->vector_size
, 0U))
11202 vinfo
->vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
11206 /* Function get_mask_type_for_scalar_type.
11208 Returns the mask type corresponding to a result of comparison
11209 of vectors of specified SCALAR_TYPE as supported by target. */
11212 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
)
11214 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
11219 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
11220 vinfo
->vector_size
);
11223 /* Function get_same_sized_vectype
11225 Returns a vector type corresponding to SCALAR_TYPE of size
11226 VECTOR_TYPE if supported by the target. */
11229 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11231 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11232 return build_same_sized_truth_vector_type (vector_type
);
11234 return get_vectype_for_scalar_type_and_size
11235 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
11238 /* Function vect_is_simple_use.
11241 VINFO - the vect info of the loop or basic block that is being vectorized.
11242 OPERAND - operand in the loop or bb.
11244 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11245 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11246 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11247 the definition could be anywhere in the function
11248 DT - the type of definition
11250 Returns whether a stmt with OPERAND can be vectorized.
11251 For loops, supportable operands are constants, loop invariants, and operands
11252 that are defined by the current iteration of the loop. Unsupportable
11253 operands are those that are defined by a previous iteration of the loop (as
11254 is the case in reduction/induction computations).
11255 For basic blocks, supportable operands are constants and bb invariants.
11256 For now, operands defined outside the basic block are not supported. */
11259 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11260 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11262 if (def_stmt_info_out
)
11263 *def_stmt_info_out
= NULL
;
11265 *def_stmt_out
= NULL
;
11266 *dt
= vect_unknown_def_type
;
11268 if (dump_enabled_p ())
11270 dump_printf_loc (MSG_NOTE
, vect_location
,
11271 "vect_is_simple_use: operand ");
11272 if (TREE_CODE (operand
) == SSA_NAME
11273 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11274 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11276 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11279 if (CONSTANT_CLASS_P (operand
))
11280 *dt
= vect_constant_def
;
11281 else if (is_gimple_min_invariant (operand
))
11282 *dt
= vect_external_def
;
11283 else if (TREE_CODE (operand
) != SSA_NAME
)
11284 *dt
= vect_unknown_def_type
;
11285 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11286 *dt
= vect_external_def
;
11289 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11290 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11292 *dt
= vect_external_def
;
11295 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11296 def_stmt
= stmt_vinfo
->stmt
;
11297 switch (gimple_code (def_stmt
))
11300 case GIMPLE_ASSIGN
:
11302 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11305 *dt
= vect_unknown_def_type
;
11308 if (def_stmt_info_out
)
11309 *def_stmt_info_out
= stmt_vinfo
;
11312 *def_stmt_out
= def_stmt
;
11315 if (dump_enabled_p ())
11317 dump_printf (MSG_NOTE
, ", type of def: ");
11320 case vect_uninitialized_def
:
11321 dump_printf (MSG_NOTE
, "uninitialized\n");
11323 case vect_constant_def
:
11324 dump_printf (MSG_NOTE
, "constant\n");
11326 case vect_external_def
:
11327 dump_printf (MSG_NOTE
, "external\n");
11329 case vect_internal_def
:
11330 dump_printf (MSG_NOTE
, "internal\n");
11332 case vect_induction_def
:
11333 dump_printf (MSG_NOTE
, "induction\n");
11335 case vect_reduction_def
:
11336 dump_printf (MSG_NOTE
, "reduction\n");
11338 case vect_double_reduction_def
:
11339 dump_printf (MSG_NOTE
, "double reduction\n");
11341 case vect_nested_cycle
:
11342 dump_printf (MSG_NOTE
, "nested cycle\n");
11344 case vect_unknown_def_type
:
11345 dump_printf (MSG_NOTE
, "unknown\n");
11350 if (*dt
== vect_unknown_def_type
)
11352 if (dump_enabled_p ())
11353 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11354 "Unsupported pattern.\n");
11361 /* Function vect_is_simple_use.
11363 Same as vect_is_simple_use but also determines the vector operand
11364 type of OPERAND and stores it to *VECTYPE. If the definition of
11365 OPERAND is vect_uninitialized_def, vect_constant_def or
11366 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11367 is responsible to compute the best suited vector type for the
11371 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11372 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11373 gimple
**def_stmt_out
)
11375 stmt_vec_info def_stmt_info
;
11377 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11381 *def_stmt_out
= def_stmt
;
11382 if (def_stmt_info_out
)
11383 *def_stmt_info_out
= def_stmt_info
;
11385 /* Now get a vector type if the def is internal, otherwise supply
11386 NULL_TREE and leave it up to the caller to figure out a proper
11387 type for the use stmt. */
11388 if (*dt
== vect_internal_def
11389 || *dt
== vect_induction_def
11390 || *dt
== vect_reduction_def
11391 || *dt
== vect_double_reduction_def
11392 || *dt
== vect_nested_cycle
)
11394 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11395 gcc_assert (*vectype
!= NULL_TREE
);
11396 if (dump_enabled_p ())
11397 dump_printf_loc (MSG_NOTE
, vect_location
,
11398 "vect_is_simple_use: vectype %T\n", *vectype
);
11400 else if (*dt
== vect_uninitialized_def
11401 || *dt
== vect_constant_def
11402 || *dt
== vect_external_def
)
11403 *vectype
= NULL_TREE
;
11405 gcc_unreachable ();
11411 /* Function supportable_widening_operation
11413 Check whether an operation represented by the code CODE is a
11414 widening operation that is supported by the target platform in
11415 vector form (i.e., when operating on arguments of type VECTYPE_IN
11416 producing a result of type VECTYPE_OUT).
11418 Widening operations we currently support are NOP (CONVERT), FLOAT,
11419 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11420 are supported by the target platform either directly (via vector
11421 tree-codes), or via target builtins.
11424 - CODE1 and CODE2 are codes of vector operations to be used when
11425 vectorizing the operation, if available.
11426 - MULTI_STEP_CVT determines the number of required intermediate steps in
11427 case of multi-step conversion (like char->short->int - in that case
11428 MULTI_STEP_CVT will be 1).
11429 - INTERM_TYPES contains the intermediate type required to perform the
11430 widening operation (short in the above example). */
11433 supportable_widening_operation (enum tree_code code
, stmt_vec_info stmt_info
,
11434 tree vectype_out
, tree vectype_in
,
11435 enum tree_code
*code1
, enum tree_code
*code2
,
11436 int *multi_step_cvt
,
11437 vec
<tree
> *interm_types
)
11439 vec_info
*vinfo
= stmt_info
->vinfo
;
11440 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
11441 class loop
*vect_loop
= NULL
;
11442 machine_mode vec_mode
;
11443 enum insn_code icode1
, icode2
;
11444 optab optab1
, optab2
;
11445 tree vectype
= vectype_in
;
11446 tree wide_vectype
= vectype_out
;
11447 enum tree_code c1
, c2
;
11449 tree prev_type
, intermediate_type
;
11450 machine_mode intermediate_mode
, prev_mode
;
11451 optab optab3
, optab4
;
11453 *multi_step_cvt
= 0;
11455 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11459 case WIDEN_MULT_EXPR
:
11460 /* The result of a vectorized widening operation usually requires
11461 two vectors (because the widened results do not fit into one vector).
11462 The generated vector results would normally be expected to be
11463 generated in the same order as in the original scalar computation,
11464 i.e. if 8 results are generated in each vector iteration, they are
11465 to be organized as follows:
11466 vect1: [res1,res2,res3,res4],
11467 vect2: [res5,res6,res7,res8].
11469 However, in the special case that the result of the widening
11470 operation is used in a reduction computation only, the order doesn't
11471 matter (because when vectorizing a reduction we change the order of
11472 the computation). Some targets can take advantage of this and
11473 generate more efficient code. For example, targets like Altivec,
11474 that support widen_mult using a sequence of {mult_even,mult_odd}
11475 generate the following vectors:
11476 vect1: [res1,res3,res5,res7],
11477 vect2: [res2,res4,res6,res8].
11479 When vectorizing outer-loops, we execute the inner-loop sequentially
11480 (each vectorized inner-loop iteration contributes to VF outer-loop
11481 iterations in parallel). We therefore don't allow to change the
11482 order of the computation in the inner-loop during outer-loop
11484 /* TODO: Another case in which order doesn't *really* matter is when we
11485 widen and then contract again, e.g. (short)((int)x * y >> 8).
11486 Normally, pack_trunc performs an even/odd permute, whereas the
11487 repack from an even/odd expansion would be an interleave, which
11488 would be significantly simpler for e.g. AVX2. */
11489 /* In any case, in order to avoid duplicating the code below, recurse
11490 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11491 are properly set up for the caller. If we fail, we'll continue with
11492 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11494 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11495 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11496 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
11497 stmt_info
, vectype_out
,
11498 vectype_in
, code1
, code2
,
11499 multi_step_cvt
, interm_types
))
11501 /* Elements in a vector with vect_used_by_reduction property cannot
11502 be reordered if the use chain with this property does not have the
11503 same operation. One such an example is s += a * b, where elements
11504 in a and b cannot be reordered. Here we check if the vector defined
11505 by STMT is only directly used in the reduction statement. */
11506 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11507 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11509 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11512 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11513 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11516 case DOT_PROD_EXPR
:
11517 c1
= DOT_PROD_EXPR
;
11518 c2
= DOT_PROD_EXPR
;
11526 case VEC_WIDEN_MULT_EVEN_EXPR
:
11527 /* Support the recursion induced just above. */
11528 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11529 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11532 case WIDEN_LSHIFT_EXPR
:
11533 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11534 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11538 c1
= VEC_UNPACK_LO_EXPR
;
11539 c2
= VEC_UNPACK_HI_EXPR
;
11543 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11544 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11547 case FIX_TRUNC_EXPR
:
11548 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11549 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11553 gcc_unreachable ();
11556 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11557 std::swap (c1
, c2
);
11559 if (code
== FIX_TRUNC_EXPR
)
11561 /* The signedness is determined from output operand. */
11562 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11563 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11565 else if (CONVERT_EXPR_CODE_P (code
)
11566 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11567 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11568 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11569 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11571 /* If the input and result modes are the same, a different optab
11572 is needed where we pass in the number of units in vectype. */
11573 optab1
= vec_unpacks_sbool_lo_optab
;
11574 optab2
= vec_unpacks_sbool_hi_optab
;
11578 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11579 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11582 if (!optab1
|| !optab2
)
11585 vec_mode
= TYPE_MODE (vectype
);
11586 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11587 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11593 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11594 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11596 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11598 /* For scalar masks we may have different boolean
11599 vector types having the same QImode. Thus we
11600 add additional check for elements number. */
11601 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11602 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11606 /* Check if it's a multi-step conversion that can be done using intermediate
11609 prev_type
= vectype
;
11610 prev_mode
= vec_mode
;
11612 if (!CONVERT_EXPR_CODE_P (code
))
11615 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11616 intermediate steps in promotion sequence. We try
11617 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11619 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11620 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11622 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11623 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11625 intermediate_type
= vect_halve_mask_nunits (vinfo
, prev_type
);
11626 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
11631 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11632 TYPE_UNSIGNED (prev_type
));
11634 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11635 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11636 && intermediate_mode
== prev_mode
11637 && SCALAR_INT_MODE_P (prev_mode
))
11639 /* If the input and result modes are the same, a different optab
11640 is needed where we pass in the number of units in vectype. */
11641 optab3
= vec_unpacks_sbool_lo_optab
;
11642 optab4
= vec_unpacks_sbool_hi_optab
;
11646 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11647 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11650 if (!optab3
|| !optab4
11651 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11652 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11653 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11654 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11655 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11656 == CODE_FOR_nothing
)
11657 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11658 == CODE_FOR_nothing
))
11661 interm_types
->quick_push (intermediate_type
);
11662 (*multi_step_cvt
)++;
11664 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11665 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11667 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11669 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11670 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11674 prev_type
= intermediate_type
;
11675 prev_mode
= intermediate_mode
;
11678 interm_types
->release ();
11683 /* Function supportable_narrowing_operation
11685 Check whether an operation represented by the code CODE is a
11686 narrowing operation that is supported by the target platform in
11687 vector form (i.e., when operating on arguments of type VECTYPE_IN
11688 and producing a result of type VECTYPE_OUT).
11690 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11691 and FLOAT. This function checks if these operations are supported by
11692 the target platform directly via vector tree-codes.
11695 - CODE1 is the code of a vector operation to be used when
11696 vectorizing the operation, if available.
11697 - MULTI_STEP_CVT determines the number of required intermediate steps in
11698 case of multi-step conversion (like int->short->char - in that case
11699 MULTI_STEP_CVT will be 1).
11700 - INTERM_TYPES contains the intermediate type required to perform the
11701 narrowing operation (short in the above example). */
11704 supportable_narrowing_operation (vec_info
*vinfo
, enum tree_code code
,
11705 tree vectype_out
, tree vectype_in
,
11706 enum tree_code
*code1
, int *multi_step_cvt
,
11707 vec
<tree
> *interm_types
)
11709 machine_mode vec_mode
;
11710 enum insn_code icode1
;
11711 optab optab1
, interm_optab
;
11712 tree vectype
= vectype_in
;
11713 tree narrow_vectype
= vectype_out
;
11715 tree intermediate_type
, prev_type
;
11716 machine_mode intermediate_mode
, prev_mode
;
11720 *multi_step_cvt
= 0;
11724 c1
= VEC_PACK_TRUNC_EXPR
;
11725 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
11726 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11727 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
11728 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11729 optab1
= vec_pack_sbool_trunc_optab
;
11731 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11734 case FIX_TRUNC_EXPR
:
11735 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
11736 /* The signedness is determined from output operand. */
11737 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11741 c1
= VEC_PACK_FLOAT_EXPR
;
11742 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11746 gcc_unreachable ();
11752 vec_mode
= TYPE_MODE (vectype
);
11753 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
11758 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11760 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11762 /* For scalar masks we may have different boolean
11763 vector types having the same QImode. Thus we
11764 add additional check for elements number. */
11765 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
11766 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11770 if (code
== FLOAT_EXPR
)
11773 /* Check if it's a multi-step conversion that can be done using intermediate
11775 prev_mode
= vec_mode
;
11776 prev_type
= vectype
;
11777 if (code
== FIX_TRUNC_EXPR
)
11778 uns
= TYPE_UNSIGNED (vectype_out
);
11780 uns
= TYPE_UNSIGNED (vectype
);
11782 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11783 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11784 costly than signed. */
11785 if (code
== FIX_TRUNC_EXPR
&& uns
)
11787 enum insn_code icode2
;
11790 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
11792 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11793 if (interm_optab
!= unknown_optab
11794 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
11795 && insn_data
[icode1
].operand
[0].mode
11796 == insn_data
[icode2
].operand
[0].mode
)
11799 optab1
= interm_optab
;
11804 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11805 intermediate steps in promotion sequence. We try
11806 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11807 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11808 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11810 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11811 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11813 intermediate_type
= vect_double_mask_nunits (vinfo
, prev_type
);
11814 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
11819 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
11820 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11821 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11822 && intermediate_mode
== prev_mode
11823 && SCALAR_INT_MODE_P (prev_mode
))
11824 interm_optab
= vec_pack_sbool_trunc_optab
;
11827 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
11830 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
11831 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11832 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
11833 == CODE_FOR_nothing
))
11836 interm_types
->quick_push (intermediate_type
);
11837 (*multi_step_cvt
)++;
11839 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11841 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11843 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
11844 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11848 prev_mode
= intermediate_mode
;
11849 prev_type
= intermediate_type
;
11850 optab1
= interm_optab
;
11853 interm_types
->release ();
11857 /* Generate and return a statement that sets vector mask MASK such that
11858 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11861 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
11863 tree cmp_type
= TREE_TYPE (start_index
);
11864 tree mask_type
= TREE_TYPE (mask
);
11865 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
11866 cmp_type
, mask_type
,
11867 OPTIMIZE_FOR_SPEED
));
11868 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
11869 start_index
, end_index
,
11870 build_zero_cst (mask_type
));
11871 gimple_call_set_lhs (call
, mask
);
11875 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11876 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11879 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
11882 tree tmp
= make_ssa_name (mask_type
);
11883 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
11884 gimple_seq_add_stmt (seq
, call
);
11885 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
11888 /* Try to compute the vector types required to vectorize STMT_INFO,
11889 returning true on success and false if vectorization isn't possible.
11893 - Set *STMT_VECTYPE_OUT to:
11894 - NULL_TREE if the statement doesn't need to be vectorized;
11895 - boolean_type_node if the statement is a boolean operation whose
11896 vector type can only be determined once all the other vector types
11898 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11900 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11901 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11902 statement does not help to determine the overall number of units. */
11905 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
11906 tree
*stmt_vectype_out
,
11907 tree
*nunits_vectype_out
)
11909 vec_info
*vinfo
= stmt_info
->vinfo
;
11910 gimple
*stmt
= stmt_info
->stmt
;
11912 *stmt_vectype_out
= NULL_TREE
;
11913 *nunits_vectype_out
= NULL_TREE
;
11915 if (gimple_get_lhs (stmt
) == NULL_TREE
11916 /* MASK_STORE has no lhs, but is ok. */
11917 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11919 if (is_a
<gcall
*> (stmt
))
11921 /* Ignore calls with no lhs. These must be calls to
11922 #pragma omp simd functions, and what vectorization factor
11923 it really needs can't be determined until
11924 vectorizable_simd_clone_call. */
11925 if (dump_enabled_p ())
11926 dump_printf_loc (MSG_NOTE
, vect_location
,
11927 "defer to SIMD clone analysis.\n");
11928 return opt_result::success ();
11931 return opt_result::failure_at (stmt
,
11932 "not vectorized: irregular stmt.%G", stmt
);
11935 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
11936 return opt_result::failure_at (stmt
,
11937 "not vectorized: vector stmt in loop:%G",
11941 tree scalar_type
= NULL_TREE
;
11942 if (STMT_VINFO_VECTYPE (stmt_info
))
11943 *stmt_vectype_out
= vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11946 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
11947 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11948 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
11950 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
11952 /* Pure bool ops don't participate in number-of-units computation.
11953 For comparisons use the types being compared. */
11954 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
)
11955 && is_gimple_assign (stmt
)
11956 && gimple_assign_rhs_code (stmt
) != COND_EXPR
)
11958 *stmt_vectype_out
= boolean_type_node
;
11960 tree rhs1
= gimple_assign_rhs1 (stmt
);
11961 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
11962 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
11963 scalar_type
= TREE_TYPE (rhs1
);
11966 if (dump_enabled_p ())
11967 dump_printf_loc (MSG_NOTE
, vect_location
,
11968 "pure bool operation.\n");
11969 return opt_result::success ();
11973 if (dump_enabled_p ())
11974 dump_printf_loc (MSG_NOTE
, vect_location
,
11975 "get vectype for scalar type: %T\n", scalar_type
);
11976 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
11978 return opt_result::failure_at (stmt
,
11980 " unsupported data-type %T\n",
11983 if (!*stmt_vectype_out
)
11984 *stmt_vectype_out
= vectype
;
11986 if (dump_enabled_p ())
11987 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
11990 /* Don't try to compute scalar types if the stmt produces a boolean
11991 vector; use the existing vector type instead. */
11992 tree nunits_vectype
;
11993 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
11994 nunits_vectype
= vectype
;
11997 /* The number of units is set according to the smallest scalar
11998 type (or the largest vector size, but we only support one
11999 vector size per vectorization). */
12000 if (*stmt_vectype_out
!= boolean_type_node
)
12002 HOST_WIDE_INT dummy
;
12003 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12006 if (dump_enabled_p ())
12007 dump_printf_loc (MSG_NOTE
, vect_location
,
12008 "get vectype for scalar type: %T\n", scalar_type
);
12009 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
12011 if (!nunits_vectype
)
12012 return opt_result::failure_at (stmt
,
12013 "not vectorized: unsupported data-type %T\n",
12016 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
12017 GET_MODE_SIZE (TYPE_MODE (nunits_vectype
))))
12018 return opt_result::failure_at (stmt
,
12019 "not vectorized: different sized vector "
12020 "types in statement, %T and %T\n",
12021 vectype
, nunits_vectype
);
12023 if (dump_enabled_p ())
12025 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n",
12028 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12029 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12030 dump_printf (MSG_NOTE
, "\n");
12033 *nunits_vectype_out
= nunits_vectype
;
12034 return opt_result::success ();
12037 /* Try to determine the correct vector type for STMT_INFO, which is a
12038 statement that produces a scalar boolean result. Return the vector
12039 type on success, otherwise return NULL_TREE. */
12042 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info
)
12044 vec_info
*vinfo
= stmt_info
->vinfo
;
12045 gimple
*stmt
= stmt_info
->stmt
;
12046 tree mask_type
= NULL
;
12047 tree vectype
, scalar_type
;
12049 if (is_gimple_assign (stmt
)
12050 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
12051 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt
))))
12053 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
12054 mask_type
= get_mask_type_for_scalar_type (vinfo
, scalar_type
);
12057 return opt_tree::failure_at (stmt
,
12058 "not vectorized: unsupported mask\n");
12064 enum vect_def_type dt
;
12066 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
12068 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &dt
, &vectype
))
12069 return opt_tree::failure_at (stmt
,
12070 "not vectorized:can't compute mask"
12071 " type for statement, %G", stmt
);
12073 /* No vectype probably means external definition.
12074 Allow it in case there is another operand which
12075 allows to determine mask type. */
12080 mask_type
= vectype
;
12081 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type
),
12082 TYPE_VECTOR_SUBPARTS (vectype
)))
12083 return opt_tree::failure_at (stmt
,
12084 "not vectorized: different sized mask"
12085 " types in statement, %T and %T\n",
12086 mask_type
, vectype
);
12087 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
12088 != VECTOR_BOOLEAN_TYPE_P (vectype
))
12089 return opt_tree::failure_at (stmt
,
12090 "not vectorized: mixed mask and "
12091 "nonmask vector types in statement, "
12093 mask_type
, vectype
);
12096 /* We may compare boolean value loaded as vector of integers.
12097 Fix mask_type in such case. */
12099 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
12100 && gimple_code (stmt
) == GIMPLE_ASSIGN
12101 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
12102 mask_type
= build_same_sized_truth_vector_type (mask_type
);
12105 /* No mask_type should mean loop invariant predicate.
12106 This is probably a subject for optimization in if-conversion. */
12108 return opt_tree::failure_at (stmt
,
12109 "not vectorized: can't compute mask type "
12110 "for statement: %G", stmt
);
12112 return opt_tree::success (mask_type
);