1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE
, vect_location
,
642 "init: stmt relevant? %G", stmt_info
->stmt
);
644 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
645 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
649 /* 2. Process_worklist */
650 while (worklist
.length () > 0)
655 stmt_vec_info stmt_vinfo
= worklist
.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
,
658 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
663 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
678 case vect_reduction_def
:
679 gcc_assert (relevant
!= vect_unused_in_scope
);
680 if (relevant
!= vect_unused_in_scope
681 && relevant
!= vect_used_in_scope
682 && relevant
!= vect_used_by_reduction
683 && relevant
!= vect_used_only_live
)
684 return opt_result::failure_at
685 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
688 case vect_nested_cycle
:
689 if (relevant
!= vect_unused_in_scope
690 && relevant
!= vect_used_in_outer_by_reduction
691 && relevant
!= vect_used_in_outer
)
692 return opt_result::failure_at
693 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
696 case vect_double_reduction_def
:
697 if (relevant
!= vect_unused_in_scope
698 && relevant
!= vect_used_by_reduction
699 && relevant
!= vect_used_only_live
)
700 return opt_result::failure_at
701 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
708 if (is_pattern_stmt_p (stmt_vinfo
))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
715 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
716 tree op
= gimple_assign_rhs1 (assign
);
719 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
722 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
723 loop_vinfo
, relevant
, &worklist
, false);
726 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
727 loop_vinfo
, relevant
, &worklist
, false);
732 for (; i
< gimple_num_ops (assign
); i
++)
734 op
= gimple_op (assign
, i
);
735 if (TREE_CODE (op
) == SSA_NAME
)
738 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
745 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
747 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
749 tree arg
= gimple_call_arg (call
, i
);
751 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
759 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
761 tree op
= USE_FROM_PTR (use_p
);
763 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
771 gather_scatter_info gs_info
;
772 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
775 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
784 } /* while worklist */
786 return opt_result::success ();
789 /* Function vect_model_simple_cost.
791 Models cost for simple operations, i.e. those that only emit ncopies of a
792 single op. Right now, this does not account for multiple insns that could
793 be generated for the single vector op. We will handle that shortly. */
796 vect_model_simple_cost (vec_info
*,
797 stmt_vec_info stmt_info
, int ncopies
,
798 enum vect_def_type
*dt
,
801 stmt_vector_for_cost
*cost_vec
,
802 vect_cost_for_stmt kind
= vector_stmt
)
804 int inside_cost
= 0, prologue_cost
= 0;
806 gcc_assert (cost_vec
!= NULL
);
808 /* ??? Somehow we need to fix this at the callers. */
810 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
813 /* Cost the "broadcast" of a scalar operand in to a vector operand.
814 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
816 for (int i
= 0; i
< ndts
; i
++)
817 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
818 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
819 stmt_info
, 0, vect_prologue
);
821 /* Adjust for two-operator SLP nodes. */
822 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
825 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
826 stmt_info
, 0, vect_body
);
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
831 stmt_info
, 0, vect_body
);
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE
, vect_location
,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
840 /* Model cost for type demotion and promotion operations. PWR is
841 normally zero for single-step promotions and demotions. It will be
842 one if two-step promotion/demotion is required, and so on. NCOPIES
843 is the number of vector results (and thus number of instructions)
844 for the narrowest end of the operation chain. Each additional
845 step doubles the number of instructions required. */
848 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
849 enum vect_def_type
*dt
,
850 unsigned int ncopies
, int pwr
,
851 stmt_vector_for_cost
*cost_vec
)
854 int inside_cost
= 0, prologue_cost
= 0;
856 for (i
= 0; i
< pwr
+ 1; i
++)
858 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_promote_demote
,
859 stmt_info
, 0, vect_body
);
863 /* FORNOW: Assuming maximum 2 args per stmts. */
864 for (i
= 0; i
< 2; i
++)
865 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
866 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
867 stmt_info
, 0, vect_prologue
);
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE
, vect_location
,
871 "vect_model_promotion_demotion_cost: inside_cost = %d, "
872 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
875 /* Returns true if the current function returns DECL. */
878 cfun_returns (tree decl
)
882 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
884 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
887 if (gimple_return_retval (ret
) == decl
)
889 /* We often end up with an aggregate copy to the result decl,
890 handle that case as well. First skip intermediate clobbers
895 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
897 while (gimple_clobber_p (def
));
898 if (is_a
<gassign
*> (def
)
899 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
900 && gimple_assign_rhs1 (def
) == decl
)
906 /* Function vect_model_store_cost
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
912 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
913 vect_memory_access_type memory_access_type
,
914 vec_load_store_type vls_type
, slp_tree slp_node
,
915 stmt_vector_for_cost
*cost_vec
)
917 unsigned int inside_cost
= 0, prologue_cost
= 0;
918 stmt_vec_info first_stmt_info
= stmt_info
;
919 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
921 /* ??? Somehow we need to fix this at the callers. */
923 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
925 if (vls_type
== VLS_STORE_INVARIANT
)
928 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
929 stmt_info
, 0, vect_prologue
);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node
&& grouped_access_p
)
935 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
947 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
949 /* Uses a high and low interleave or shuffle operations for each
951 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
952 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
953 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
954 stmt_info
, 0, vect_body
);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE
, vect_location
,
958 "vect_model_store_cost: strided group_size = %d .\n",
962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
963 /* Costs of the stores. */
964 if (memory_access_type
== VMAT_ELEMENTWISE
965 || memory_access_type
== VMAT_GATHER_SCATTER
)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
969 inside_cost
+= record_stmt_cost (cost_vec
,
970 ncopies
* assumed_nunits
,
971 scalar_store
, stmt_info
, 0, vect_body
);
974 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, &inside_cost
, cost_vec
);
976 if (memory_access_type
== VMAT_ELEMENTWISE
977 || memory_access_type
== VMAT_STRIDED_SLP
)
979 /* N scalar stores plus extracting the elements. */
980 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
981 inside_cost
+= record_stmt_cost (cost_vec
,
982 ncopies
* assumed_nunits
,
983 vec_to_scalar
, stmt_info
, 0, vect_body
);
986 /* When vectorizing a store into the function result assign
987 a penalty if the function returns in a multi-register location.
988 In this case we assume we'll end up with having to spill the
989 vector result and do piecewise loads as a conservative estimate. */
990 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
992 && (TREE_CODE (base
) == RESULT_DECL
993 || (DECL_P (base
) && cfun_returns (base
)))
994 && !aggregate_value_p (base
, cfun
->decl
))
996 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
997 /* ??? Handle PARALLEL in some way. */
1000 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1001 /* Assume that a single reg-reg move is possible and cheap,
1002 do not account for vector to gp register move cost. */
1006 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1008 stmt_info
, 0, vect_epilogue
);
1010 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1012 stmt_info
, 0, vect_epilogue
);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE
, vect_location
,
1019 "vect_model_store_cost: inside_cost = %d, "
1020 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1024 /* Calculate cost of DR's memory access. */
1026 vect_get_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1027 unsigned int *inside_cost
,
1028 stmt_vector_for_cost
*body_cost_vec
)
1030 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1031 int alignment_support_scheme
1032 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1034 switch (alignment_support_scheme
)
1038 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1039 vector_store
, stmt_info
, 0,
1042 if (dump_enabled_p ())
1043 dump_printf_loc (MSG_NOTE
, vect_location
,
1044 "vect_model_store_cost: aligned.\n");
1048 case dr_unaligned_supported
:
1050 /* Here, we assign an additional cost for the unaligned store. */
1051 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1052 unaligned_store
, stmt_info
,
1053 DR_MISALIGNMENT (dr_info
),
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_NOTE
, vect_location
,
1057 "vect_model_store_cost: unaligned supported by "
1062 case dr_unaligned_unsupported
:
1064 *inside_cost
= VECT_MAX_COST
;
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1068 "vect_model_store_cost: unsupported access.\n");
1078 /* Function vect_model_load_cost
1080 Models cost for loads. In the case of grouped accesses, one access has
1081 the overhead of the grouped access attributed to it. Since unaligned
1082 accesses are supported for loads, we also account for the costs of the
1083 access scheme chosen. */
1086 vect_model_load_cost (vec_info
*vinfo
,
1087 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1088 vect_memory_access_type memory_access_type
,
1090 stmt_vector_for_cost
*cost_vec
)
1092 unsigned int inside_cost
= 0, prologue_cost
= 0;
1093 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1095 gcc_assert (cost_vec
);
1097 /* ??? Somehow we need to fix this at the callers. */
1099 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1101 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1106 /* Record the cost for the permutation. */
1108 unsigned assumed_nunits
1109 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1110 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1111 vf
, true, &n_perms
);
1112 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1113 first_stmt_info
, 0, vect_body
);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1117 bitmap_clear (perm
);
1118 for (unsigned i
= 0;
1119 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1120 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1122 bool load_seen
= false;
1123 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1125 if (i
% assumed_nunits
== 0)
1131 if (bitmap_bit_p (perm
, i
))
1137 <= (DR_GROUP_SIZE (first_stmt_info
)
1138 - DR_GROUP_GAP (first_stmt_info
)
1139 + assumed_nunits
- 1) / assumed_nunits
);
1142 /* Grouped loads read all elements in the group at once,
1143 so we want the DR for the first statement. */
1144 stmt_vec_info first_stmt_info
= stmt_info
;
1145 if (!slp_node
&& grouped_access_p
)
1146 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1148 /* True if we should include any once-per-group costs as well as
1149 the cost of the statement itself. For SLP we only get called
1150 once per group anyhow. */
1151 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1158 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1163 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1164 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1165 stmt_info
, 0, vect_body
);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE
, vect_location
,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1173 /* The loads themselves. */
1174 if (memory_access_type
== VMAT_ELEMENTWISE
1175 || memory_access_type
== VMAT_GATHER_SCATTER
)
1177 /* N scalar loads plus gathering them into a vector. */
1178 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1179 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1180 inside_cost
+= record_stmt_cost (cost_vec
,
1181 ncopies
* assumed_nunits
,
1182 scalar_load
, stmt_info
, 0, vect_body
);
1185 vect_get_load_cost (vinfo
, stmt_info
, ncopies
, first_stmt_p
,
1186 &inside_cost
, &prologue_cost
,
1187 cost_vec
, cost_vec
, true);
1188 if (memory_access_type
== VMAT_ELEMENTWISE
1189 || memory_access_type
== VMAT_STRIDED_SLP
)
1190 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1191 stmt_info
, 0, vect_body
);
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_NOTE
, vect_location
,
1195 "vect_model_load_cost: inside_cost = %d, "
1196 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1200 /* Calculate cost of DR's memory access. */
1202 vect_get_load_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1203 bool add_realign_cost
, unsigned int *inside_cost
,
1204 unsigned int *prologue_cost
,
1205 stmt_vector_for_cost
*prologue_cost_vec
,
1206 stmt_vector_for_cost
*body_cost_vec
,
1207 bool record_prologue_costs
)
1209 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1210 int alignment_support_scheme
1211 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1213 switch (alignment_support_scheme
)
1217 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1218 stmt_info
, 0, vect_body
);
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE
, vect_location
,
1222 "vect_model_load_cost: aligned.\n");
1226 case dr_unaligned_supported
:
1228 /* Here, we assign an additional cost for the unaligned load. */
1229 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1230 unaligned_load
, stmt_info
,
1231 DR_MISALIGNMENT (dr_info
),
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_NOTE
, vect_location
,
1236 "vect_model_load_cost: unaligned supported by "
1241 case dr_explicit_realign
:
1243 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1244 vector_load
, stmt_info
, 0, vect_body
);
1245 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1246 vec_perm
, stmt_info
, 0, vect_body
);
1248 /* FIXME: If the misalignment remains fixed across the iterations of
1249 the containing loop, the following cost should be added to the
1251 if (targetm
.vectorize
.builtin_mask_for_load
)
1252 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1253 stmt_info
, 0, vect_body
);
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE
, vect_location
,
1257 "vect_model_load_cost: explicit realign\n");
1261 case dr_explicit_realign_optimized
:
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE
, vect_location
,
1265 "vect_model_load_cost: unaligned software "
1268 /* Unaligned software pipeline has a load of an address, an initial
1269 load, and possibly a mask operation to "prime" the loop. However,
1270 if this is an access in a group of loads, which provide grouped
1271 access, then the above cost should only be considered for one
1272 access in the group. Inside the loop, there is a load op
1273 and a realignment op. */
1275 if (add_realign_cost
&& record_prologue_costs
)
1277 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1278 vector_stmt
, stmt_info
,
1280 if (targetm
.vectorize
.builtin_mask_for_load
)
1281 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1282 vector_stmt
, stmt_info
,
1286 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1287 stmt_info
, 0, vect_body
);
1288 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1289 stmt_info
, 0, vect_body
);
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE
, vect_location
,
1293 "vect_model_load_cost: explicit realign optimized"
1299 case dr_unaligned_unsupported
:
1301 *inside_cost
= VECT_MAX_COST
;
1303 if (dump_enabled_p ())
1304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1305 "vect_model_load_cost: unsupported access.\n");
1314 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1315 the loop preheader for the vectorized stmt STMT_VINFO. */
1318 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1319 gimple_stmt_iterator
*gsi
)
1322 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1325 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1329 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1333 if (stmt_vinfo
&& nested_in_vect_loop_p (loop
, stmt_vinfo
))
1336 pe
= loop_preheader_edge (loop
);
1337 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1338 gcc_assert (!new_bb
);
1342 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
1343 gimple_stmt_iterator gsi_region_begin
= bb_vinfo
->region_begin
;
1344 gsi_insert_before (&gsi_region_begin
, new_stmt
, GSI_SAME_STMT
);
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE
, vect_location
,
1350 "created new init_stmt: %G", new_stmt
);
1353 /* Function vect_init_vector.
1355 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1356 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1357 vector type a vector with all elements equal to VAL is created first.
1358 Place the initialization at GSI if it is not NULL. Otherwise, place the
1359 initialization at the loop preheader.
1360 Return the DEF of INIT_STMT.
1361 It will be used in the vectorization of STMT_INFO. */
1364 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1365 gimple_stmt_iterator
*gsi
)
1370 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1371 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1373 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1374 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1376 /* Scalar boolean value should be transformed into
1377 all zeros or all ones value before building a vector. */
1378 if (VECTOR_BOOLEAN_TYPE_P (type
))
1380 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1381 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1383 if (CONSTANT_CLASS_P (val
))
1384 val
= integer_zerop (val
) ? false_val
: true_val
;
1387 new_temp
= make_ssa_name (TREE_TYPE (type
));
1388 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1389 val
, true_val
, false_val
);
1390 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1396 gimple_seq stmts
= NULL
;
1397 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1398 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1399 TREE_TYPE (type
), val
);
1401 /* ??? Condition vectorization expects us to do
1402 promotion of invariant/external defs. */
1403 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1404 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1405 !gsi_end_p (gsi2
); )
1407 init_stmt
= gsi_stmt (gsi2
);
1408 gsi_remove (&gsi2
, false);
1409 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1413 val
= build_vector_from_val (type
, val
);
1416 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1417 init_stmt
= gimple_build_assign (new_temp
, val
);
1418 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1422 /* Function vect_get_vec_def_for_operand_1.
1424 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1425 with type DT that will be used in the vectorized stmt. */
1428 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1429 enum vect_def_type dt
)
1432 stmt_vec_info vec_stmt_info
;
1436 /* operand is a constant or a loop invariant. */
1437 case vect_constant_def
:
1438 case vect_external_def
:
1439 /* Code should use vect_get_vec_def_for_operand. */
1442 /* Operand is defined by a loop header phi. In case of nested
1443 cycles we also may have uses of the backedge def. */
1444 case vect_reduction_def
:
1445 case vect_double_reduction_def
:
1446 case vect_nested_cycle
:
1447 case vect_induction_def
:
1448 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1449 || dt
== vect_nested_cycle
);
1452 /* operand is defined inside the loop. */
1453 case vect_internal_def
:
1455 /* Get the def from the vectorized stmt. */
1456 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1457 /* Get vectorized pattern statement. */
1459 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1460 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1461 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1462 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1463 gcc_assert (vec_stmt_info
);
1464 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1465 vec_oprnd
= PHI_RESULT (phi
);
1467 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1477 /* Function vect_get_vec_def_for_operand.
1479 OP is an operand in STMT_VINFO. This function returns a (vector) def
1480 that will be used in the vectorized stmt for STMT_VINFO.
1482 In the case that OP is an SSA_NAME which is defined in the loop, then
1483 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1485 In case OP is an invariant or constant, a new stmt that creates a vector def
1486 needs to be introduced. VECTYPE may be used to specify a required type for
1487 vector invariant. */
1490 vect_get_vec_def_for_operand (vec_info
*vinfo
,
1491 tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1494 enum vect_def_type dt
;
1496 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1498 if (dump_enabled_p ())
1499 dump_printf_loc (MSG_NOTE
, vect_location
,
1500 "vect_get_vec_def_for_operand: %T\n", op
);
1502 stmt_vec_info def_stmt_info
;
1503 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1504 &def_stmt_info
, &def_stmt
);
1505 gcc_assert (is_simple_use
);
1506 if (def_stmt
&& dump_enabled_p ())
1507 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1509 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1511 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1515 vector_type
= vectype
;
1516 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1517 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1518 vector_type
= truth_type_for (stmt_vectype
);
1520 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1522 gcc_assert (vector_type
);
1523 return vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1526 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1530 /* Function vect_get_vec_def_for_stmt_copy
1532 Return a vector-def for an operand. This function is used when the
1533 vectorized stmt to be created (by the caller to this function) is a "copy"
1534 created in case the vectorized result cannot fit in one vector, and several
1535 copies of the vector-stmt are required. In this case the vector-def is
1536 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1537 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1540 In case the vectorization factor (VF) is bigger than the number
1541 of elements that can fit in a vectype (nunits), we have to generate
1542 more than one vector stmt to vectorize the scalar stmt. This situation
1543 arises when there are multiple data-types operated upon in the loop; the
1544 smallest data-type determines the VF, and as a result, when vectorizing
1545 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1546 vector stmt (each computing a vector of 'nunits' results, and together
1547 computing 'VF' results in each iteration). This function is called when
1548 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1549 which VF=16 and nunits=4, so the number of copies required is 4):
1551 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1553 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1554 VS1.1: vx.1 = memref1 VS1.2
1555 VS1.2: vx.2 = memref2 VS1.3
1556 VS1.3: vx.3 = memref3
1558 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1559 VSnew.1: vz1 = vx.1 + ... VSnew.2
1560 VSnew.2: vz2 = vx.2 + ... VSnew.3
1561 VSnew.3: vz3 = vx.3 + ...
1563 The vectorization of S1 is explained in vectorizable_load.
1564 The vectorization of S2:
1565 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1566 the function 'vect_get_vec_def_for_operand' is called to
1567 get the relevant vector-def for each operand of S2. For operand x it
1568 returns the vector-def 'vx.0'.
1570 To create the remaining copies of the vector-stmt (VSnew.j), this
1571 function is called to get the relevant vector-def for each operand. It is
1572 obtained from the respective VS1.j stmt, which is recorded in the
1573 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1575 For example, to obtain the vector-def 'vx.1' in order to create the
1576 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1577 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1578 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1579 and return its def ('vx.1').
1580 Overall, to create the above sequence this function will be called 3 times:
1581 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1582 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1583 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1586 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1588 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1590 /* Do nothing; can reuse same def. */
1593 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1594 gcc_assert (def_stmt_info
);
1595 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1596 vec_oprnd
= PHI_RESULT (phi
);
1598 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1603 /* Get vectorized definitions for the operands to create a copy of an original
1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1607 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1608 vec
<tree
> *vec_oprnds0
,
1609 vec
<tree
> *vec_oprnds1
)
1611 tree vec_oprnd
= vec_oprnds0
->pop ();
1613 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1614 vec_oprnds0
->quick_push (vec_oprnd
);
1616 if (vec_oprnds1
&& vec_oprnds1
->length ())
1618 vec_oprnd
= vec_oprnds1
->pop ();
1619 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1620 vec_oprnds1
->quick_push (vec_oprnd
);
1625 /* Get vectorized definitions for OP0 and OP1. */
1628 vect_get_vec_defs (vec_info
*vinfo
, tree op0
, tree op1
, stmt_vec_info stmt_info
,
1629 vec
<tree
> *vec_oprnds0
,
1630 vec
<tree
> *vec_oprnds1
,
1635 auto_vec
<vec
<tree
> > vec_defs (SLP_TREE_CHILDREN (slp_node
).length ());
1636 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
, op1
? 2 : 1);
1637 *vec_oprnds0
= vec_defs
[0];
1639 *vec_oprnds1
= vec_defs
[1];
1645 vec_oprnds0
->create (1);
1646 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, op0
, stmt_info
);
1647 vec_oprnds0
->quick_push (vec_oprnd
);
1651 vec_oprnds1
->create (1);
1652 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, op1
, stmt_info
);
1653 vec_oprnds1
->quick_push (vec_oprnd
);
1658 /* Helper function called by vect_finish_replace_stmt and
1659 vect_finish_stmt_generation. Set the location of the new
1660 statement and create and return a stmt_vec_info for it. */
1662 static stmt_vec_info
1663 vect_finish_stmt_generation_1 (vec_info
*vinfo
,
1664 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1666 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1668 if (dump_enabled_p ())
1669 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1673 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1675 /* While EH edges will generally prevent vectorization, stmt might
1676 e.g. be in a must-not-throw region. Ensure newly created stmts
1677 that could throw are part of the same region. */
1678 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1679 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1680 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1683 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1685 return vec_stmt_info
;
1688 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1689 which sets the same scalar result as STMT_INFO did. Create and return a
1690 stmt_vec_info for VEC_STMT. */
1693 vect_finish_replace_stmt (vec_info
*vinfo
,
1694 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1696 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1697 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1699 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1700 gsi_replace (&gsi
, vec_stmt
, true);
1702 return vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1705 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1706 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1709 vect_finish_stmt_generation (vec_info
*vinfo
,
1710 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1711 gimple_stmt_iterator
*gsi
)
1713 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1715 if (!gsi_end_p (*gsi
)
1716 && gimple_has_mem_ops (vec_stmt
))
1718 gimple
*at_stmt
= gsi_stmt (*gsi
);
1719 tree vuse
= gimple_vuse (at_stmt
);
1720 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1722 tree vdef
= gimple_vdef (at_stmt
);
1723 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1724 /* If we have an SSA vuse and insert a store, update virtual
1725 SSA form to avoid triggering the renamer. Do so only
1726 if we can easily see all uses - which is what almost always
1727 happens with the way vectorized stmts are inserted. */
1728 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1729 && ((is_gimple_assign (vec_stmt
)
1730 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1731 || (is_gimple_call (vec_stmt
)
1732 && !(gimple_call_flags (vec_stmt
)
1733 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1735 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1736 gimple_set_vdef (vec_stmt
, new_vdef
);
1737 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1741 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1742 return vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1745 /* We want to vectorize a call to combined function CFN with function
1746 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1747 as the types of all inputs. Check whether this is possible using
1748 an internal function, returning its code if so or IFN_LAST if not. */
1751 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1752 tree vectype_out
, tree vectype_in
)
1755 if (internal_fn_p (cfn
))
1756 ifn
= as_internal_fn (cfn
);
1758 ifn
= associated_internal_fn (fndecl
);
1759 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1761 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1762 if (info
.vectorizable
)
1764 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1765 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1766 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1767 OPTIMIZE_FOR_SPEED
))
1775 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1776 gimple_stmt_iterator
*);
1778 /* Check whether a load or store statement in the loop described by
1779 LOOP_VINFO is possible in a fully-masked loop. This is testing
1780 whether the vectorizer pass has the appropriate support, as well as
1781 whether the target does.
1783 VLS_TYPE says whether the statement is a load or store and VECTYPE
1784 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1785 says how the load or store is going to be implemented and GROUP_SIZE
1786 is the number of load or store statements in the containing group.
1787 If the access is a gather load or scatter store, GS_INFO describes
1788 its arguments. If the load or store is conditional, SCALAR_MASK is the
1789 condition under which it occurs.
1791 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1792 supported, otherwise record the required mask types. */
1795 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1796 vec_load_store_type vls_type
, int group_size
,
1797 vect_memory_access_type memory_access_type
,
1798 gather_scatter_info
*gs_info
, tree scalar_mask
)
1800 /* Invariant loads need no special support. */
1801 if (memory_access_type
== VMAT_INVARIANT
)
1804 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1805 machine_mode vecmode
= TYPE_MODE (vectype
);
1806 bool is_load
= (vls_type
== VLS_LOAD
);
1807 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1810 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1811 : !vect_store_lanes_supported (vectype
, group_size
, true))
1813 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1815 "can't use a fully-masked loop because the"
1816 " target doesn't have an appropriate masked"
1817 " load/store-lanes instruction.\n");
1818 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1821 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1822 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1826 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1828 internal_fn ifn
= (is_load
1829 ? IFN_MASK_GATHER_LOAD
1830 : IFN_MASK_SCATTER_STORE
);
1831 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1832 gs_info
->memory_type
,
1833 gs_info
->offset_vectype
,
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1838 "can't use a fully-masked loop because the"
1839 " target doesn't have an appropriate masked"
1840 " gather load or scatter store instruction.\n");
1841 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1844 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1845 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1849 if (memory_access_type
!= VMAT_CONTIGUOUS
1850 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1852 /* Element X of the data must come from iteration i * VF + X of the
1853 scalar loop. We need more work to support other mappings. */
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1856 "can't use a fully-masked loop because an access"
1857 " isn't contiguous.\n");
1858 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1862 machine_mode mask_mode
;
1863 if (!VECTOR_MODE_P (vecmode
)
1864 || !targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1865 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1869 "can't use a fully-masked loop because the target"
1870 " doesn't have the appropriate masked load or"
1872 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1875 /* We might load more scalars than we need for permuting SLP loads.
1876 We checked in get_group_load_store_type that the extra elements
1877 don't leak into a new vector. */
1878 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1879 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1880 unsigned int nvectors
;
1881 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1882 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1887 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1888 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1889 that needs to be applied to all loads and stores in a vectorized loop.
1890 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1892 MASK_TYPE is the type of both masks. If new statements are needed,
1893 insert them before GSI. */
1896 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1897 gimple_stmt_iterator
*gsi
)
1899 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1903 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1904 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1905 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1906 vec_mask
, loop_mask
);
1907 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1911 /* Determine whether we can use a gather load or scatter store to vectorize
1912 strided load or store STMT_INFO by truncating the current offset to a
1913 smaller width. We need to be able to construct an offset vector:
1915 { 0, X, X*2, X*3, ... }
1917 without loss of precision, where X is STMT_INFO's DR_STEP.
1919 Return true if this is possible, describing the gather load or scatter
1920 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1923 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1924 loop_vec_info loop_vinfo
, bool masked_p
,
1925 gather_scatter_info
*gs_info
)
1927 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1928 data_reference
*dr
= dr_info
->dr
;
1929 tree step
= DR_STEP (dr
);
1930 if (TREE_CODE (step
) != INTEGER_CST
)
1932 /* ??? Perhaps we could use range information here? */
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_NOTE
, vect_location
,
1935 "cannot truncate variable step.\n");
1939 /* Get the number of bits in an element. */
1940 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1941 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1942 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1944 /* Set COUNT to the upper limit on the number of elements - 1.
1945 Start with the maximum vectorization factor. */
1946 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1948 /* Try lowering COUNT to the number of scalar latch iterations. */
1949 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1950 widest_int max_iters
;
1951 if (max_loop_iterations (loop
, &max_iters
)
1952 && max_iters
< count
)
1953 count
= max_iters
.to_shwi ();
1955 /* Try scales of 1 and the element size. */
1956 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1957 wi::overflow_type overflow
= wi::OVF_NONE
;
1958 for (int i
= 0; i
< 2; ++i
)
1960 int scale
= scales
[i
];
1962 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1965 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1966 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1969 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1970 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1972 /* Find the narrowest viable offset type. */
1973 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1974 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1977 /* See whether the target supports the operation with an offset
1978 no narrower than OFFSET_TYPE. */
1979 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1980 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1981 vectype
, memory_type
, offset_type
, scale
,
1982 &gs_info
->ifn
, &gs_info
->offset_vectype
))
1985 gs_info
->decl
= NULL_TREE
;
1986 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1987 but we don't need to store that here. */
1988 gs_info
->base
= NULL_TREE
;
1989 gs_info
->element_type
= TREE_TYPE (vectype
);
1990 gs_info
->offset
= fold_convert (offset_type
, step
);
1991 gs_info
->offset_dt
= vect_constant_def
;
1992 gs_info
->scale
= scale
;
1993 gs_info
->memory_type
= memory_type
;
1997 if (overflow
&& dump_enabled_p ())
1998 dump_printf_loc (MSG_NOTE
, vect_location
,
1999 "truncating gather/scatter offset to %d bits"
2000 " might change its value.\n", element_bits
);
2005 /* Return true if we can use gather/scatter internal functions to
2006 vectorize STMT_INFO, which is a grouped or strided load or store.
2007 MASKED_P is true if load or store is conditional. When returning
2008 true, fill in GS_INFO with the information required to perform the
2012 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2013 loop_vec_info loop_vinfo
, bool masked_p
,
2014 gather_scatter_info
*gs_info
)
2016 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2018 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2021 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
2022 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2024 gcc_assert (TYPE_PRECISION (new_offset_type
)
2025 >= TYPE_PRECISION (old_offset_type
));
2026 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_NOTE
, vect_location
,
2030 "using gather/scatter for strided/grouped access,"
2031 " scale = %d\n", gs_info
->scale
);
2036 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2037 elements with a known constant step. Return -1 if that step
2038 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2041 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
2043 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2044 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
2048 /* If the target supports a permute mask that reverses the elements in
2049 a vector of type VECTYPE, return that mask, otherwise return null. */
2052 perm_mask_for_reverse (tree vectype
)
2054 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2056 /* The encoding has a single stepped pattern. */
2057 vec_perm_builder
sel (nunits
, 1, 3);
2058 for (int i
= 0; i
< 3; ++i
)
2059 sel
.quick_push (nunits
- 1 - i
);
2061 vec_perm_indices
indices (sel
, 1, nunits
);
2062 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2064 return vect_gen_perm_mask_checked (vectype
, indices
);
2067 /* A subroutine of get_load_store_type, with a subset of the same
2068 arguments. Handle the case where STMT_INFO is a load or store that
2069 accesses consecutive elements with a negative step. */
2071 static vect_memory_access_type
2072 get_negative_load_store_type (vec_info
*vinfo
,
2073 stmt_vec_info stmt_info
, tree vectype
,
2074 vec_load_store_type vls_type
,
2075 unsigned int ncopies
)
2077 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2078 dr_alignment_support alignment_support_scheme
;
2082 if (dump_enabled_p ())
2083 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2084 "multiple types with negative step.\n");
2085 return VMAT_ELEMENTWISE
;
2088 alignment_support_scheme
= vect_supportable_dr_alignment (vinfo
,
2090 if (alignment_support_scheme
!= dr_aligned
2091 && alignment_support_scheme
!= dr_unaligned_supported
)
2093 if (dump_enabled_p ())
2094 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2095 "negative step but alignment required.\n");
2096 return VMAT_ELEMENTWISE
;
2099 if (vls_type
== VLS_STORE_INVARIANT
)
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_NOTE
, vect_location
,
2103 "negative step with invariant source;"
2104 " no permute needed.\n");
2105 return VMAT_CONTIGUOUS_DOWN
;
2108 if (!perm_mask_for_reverse (vectype
))
2110 if (dump_enabled_p ())
2111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2112 "negative step and reversing not supported.\n");
2113 return VMAT_ELEMENTWISE
;
2116 return VMAT_CONTIGUOUS_REVERSE
;
2119 /* STMT_INFO is either a masked or unconditional store. Return the value
2123 vect_get_store_rhs (stmt_vec_info stmt_info
)
2125 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2127 gcc_assert (gimple_assign_single_p (assign
));
2128 return gimple_assign_rhs1 (assign
);
2130 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2132 internal_fn ifn
= gimple_call_internal_fn (call
);
2133 int index
= internal_fn_stored_value_index (ifn
);
2134 gcc_assert (index
>= 0);
2135 return gimple_call_arg (call
, index
);
2140 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2142 This function returns a vector type which can be composed with NETLS pieces,
2143 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2144 same vector size as the return vector. It checks target whether supports
2145 pieces-size vector mode for construction firstly, if target fails to, check
2146 pieces-size scalar mode for construction further. It returns NULL_TREE if
2147 fails to find the available composition.
2149 For example, for (vtype=V16QI, nelts=4), we can probably get:
2150 - V16QI with PTYPE V4QI.
2151 - V4SI with PTYPE SI.
2155 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2157 gcc_assert (VECTOR_TYPE_P (vtype
));
2158 gcc_assert (known_gt (nelts
, 0U));
2160 machine_mode vmode
= TYPE_MODE (vtype
);
2161 if (!VECTOR_MODE_P (vmode
))
2164 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2165 unsigned int pbsize
;
2166 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2168 /* First check if vec_init optab supports construction from
2169 vector pieces directly. */
2170 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2171 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2173 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2174 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2175 != CODE_FOR_nothing
))
2177 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2181 /* Otherwise check if exists an integer type of the same piece size and
2182 if vec_init optab supports construction from it directly. */
2183 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2184 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2185 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2186 != CODE_FOR_nothing
))
2188 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2189 return build_vector_type (*ptype
, nelts
);
2196 /* A subroutine of get_load_store_type, with a subset of the same
2197 arguments. Handle the case where STMT_INFO is part of a grouped load
2200 For stores, the statements in the group are all consecutive
2201 and there is no gap at the end. For loads, the statements in the
2202 group might not be consecutive; there can be gaps between statements
2203 as well as at the end. */
2206 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2207 tree vectype
, bool slp
,
2208 bool masked_p
, vec_load_store_type vls_type
,
2209 vect_memory_access_type
*memory_access_type
,
2210 gather_scatter_info
*gs_info
)
2212 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2213 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2214 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2215 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2216 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2217 bool single_element_p
= (stmt_info
== first_stmt_info
2218 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2219 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2220 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2222 /* True if the vectorized statements would access beyond the last
2223 statement in the group. */
2224 bool overrun_p
= false;
2226 /* True if we can cope with such overrun by peeling for gaps, so that
2227 there is at least one final scalar iteration after the vector loop. */
2228 bool can_overrun_p
= (!masked_p
2229 && vls_type
== VLS_LOAD
2233 /* There can only be a gap at the end of the group if the stride is
2234 known at compile time. */
2235 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2237 /* Stores can't yet have gaps. */
2238 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2242 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2244 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2245 separated by the stride, until we have a complete vector.
2246 Fall back to scalar accesses if that isn't possible. */
2247 if (multiple_p (nunits
, group_size
))
2248 *memory_access_type
= VMAT_STRIDED_SLP
;
2250 *memory_access_type
= VMAT_ELEMENTWISE
;
2254 overrun_p
= loop_vinfo
&& gap
!= 0;
2255 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2257 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2258 "Grouped store with gaps requires"
2259 " non-consecutive accesses\n");
2262 /* An overrun is fine if the trailing elements are smaller
2263 than the alignment boundary B. Every vector access will
2264 be a multiple of B and so we are guaranteed to access a
2265 non-gap element in the same B-sized block. */
2267 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2268 / vect_get_scalar_dr_size (first_dr_info
)))
2271 /* If the gap splits the vector in half and the target
2272 can do half-vector operations avoid the epilogue peeling
2273 by simply loading half of the vector only. Usually
2274 the construction with an upper zero half will be elided. */
2275 dr_alignment_support alignment_support_scheme
;
2279 && (((alignment_support_scheme
2280 = vect_supportable_dr_alignment (vinfo
,
2281 first_dr_info
, false)))
2283 || alignment_support_scheme
== dr_unaligned_supported
)
2284 && known_eq (nunits
, (group_size
- gap
) * 2)
2285 && known_eq (nunits
, group_size
)
2286 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2290 if (overrun_p
&& !can_overrun_p
)
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2294 "Peeling for outer loop is not supported\n");
2297 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2299 *memory_access_type
= get_negative_load_store_type
2300 (vinfo
, stmt_info
, vectype
, vls_type
, 1);
2303 gcc_assert (!loop_vinfo
|| cmp
> 0);
2304 *memory_access_type
= VMAT_CONTIGUOUS
;
2310 /* We can always handle this case using elementwise accesses,
2311 but see if something more efficient is available. */
2312 *memory_access_type
= VMAT_ELEMENTWISE
;
2314 /* If there is a gap at the end of the group then these optimizations
2315 would access excess elements in the last iteration. */
2316 bool would_overrun_p
= (gap
!= 0);
2317 /* An overrun is fine if the trailing elements are smaller than the
2318 alignment boundary B. Every vector access will be a multiple of B
2319 and so we are guaranteed to access a non-gap element in the
2320 same B-sized block. */
2323 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2324 / vect_get_scalar_dr_size (first_dr_info
)))
2325 would_overrun_p
= false;
2327 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2328 && (can_overrun_p
|| !would_overrun_p
)
2329 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2331 /* First cope with the degenerate case of a single-element
2333 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2334 *memory_access_type
= VMAT_CONTIGUOUS
;
2336 /* Otherwise try using LOAD/STORE_LANES. */
2337 if (*memory_access_type
== VMAT_ELEMENTWISE
2338 && (vls_type
== VLS_LOAD
2339 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2340 : vect_store_lanes_supported (vectype
, group_size
,
2343 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2344 overrun_p
= would_overrun_p
;
2347 /* If that fails, try using permuting loads. */
2348 if (*memory_access_type
== VMAT_ELEMENTWISE
2349 && (vls_type
== VLS_LOAD
2350 ? vect_grouped_load_supported (vectype
, single_element_p
,
2352 : vect_grouped_store_supported (vectype
, group_size
)))
2354 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2355 overrun_p
= would_overrun_p
;
2359 /* As a last resort, trying using a gather load or scatter store.
2361 ??? Although the code can handle all group sizes correctly,
2362 it probably isn't a win to use separate strided accesses based
2363 on nearby locations. Or, even if it's a win over scalar code,
2364 it might not be a win over vectorizing at a lower VF, if that
2365 allows us to use contiguous accesses. */
2366 if (*memory_access_type
== VMAT_ELEMENTWISE
2369 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2371 *memory_access_type
= VMAT_GATHER_SCATTER
;
2374 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2376 /* STMT is the leader of the group. Check the operands of all the
2377 stmts of the group. */
2378 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2379 while (next_stmt_info
)
2381 tree op
= vect_get_store_rhs (next_stmt_info
);
2382 enum vect_def_type dt
;
2383 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2387 "use not simple.\n");
2390 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2396 gcc_assert (can_overrun_p
);
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2399 "Data access with gaps requires scalar "
2401 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2407 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2408 if there is a memory access type that the vectorized form can use,
2409 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2410 or scatters, fill in GS_INFO accordingly.
2412 SLP says whether we're performing SLP rather than loop vectorization.
2413 MASKED_P is true if the statement is conditional on a vectorized mask.
2414 VECTYPE is the vector type that the vectorized statements will use.
2415 NCOPIES is the number of vector statements that will be needed. */
2418 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2419 tree vectype
, bool slp
,
2420 bool masked_p
, vec_load_store_type vls_type
,
2421 unsigned int ncopies
,
2422 vect_memory_access_type
*memory_access_type
,
2423 gather_scatter_info
*gs_info
)
2425 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2426 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2427 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2429 *memory_access_type
= VMAT_GATHER_SCATTER
;
2430 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2432 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2433 &gs_info
->offset_dt
,
2434 &gs_info
->offset_vectype
))
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2438 "%s index use not simple.\n",
2439 vls_type
== VLS_LOAD
? "gather" : "scatter");
2443 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2445 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp
, masked_p
,
2446 vls_type
, memory_access_type
, gs_info
))
2449 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2453 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2455 *memory_access_type
= VMAT_GATHER_SCATTER
;
2457 *memory_access_type
= VMAT_ELEMENTWISE
;
2461 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2463 *memory_access_type
= get_negative_load_store_type
2464 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
);
2467 gcc_assert (vls_type
== VLS_LOAD
);
2468 *memory_access_type
= VMAT_INVARIANT
;
2471 *memory_access_type
= VMAT_CONTIGUOUS
;
2474 if ((*memory_access_type
== VMAT_ELEMENTWISE
2475 || *memory_access_type
== VMAT_STRIDED_SLP
)
2476 && !nunits
.is_constant ())
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2480 "Not using elementwise accesses due to variable "
2481 "vectorization factor.\n");
2485 /* FIXME: At the moment the cost model seems to underestimate the
2486 cost of using elementwise accesses. This check preserves the
2487 traditional behavior until that can be fixed. */
2488 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2489 if (!first_stmt_info
)
2490 first_stmt_info
= stmt_info
;
2491 if (*memory_access_type
== VMAT_ELEMENTWISE
2492 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2493 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2494 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2495 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2497 if (dump_enabled_p ())
2498 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2499 "not falling back to elementwise accesses\n");
2505 /* Return true if boolean argument MASK is suitable for vectorizing
2506 conditional operation STMT_INFO. When returning true, store the type
2507 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2508 in *MASK_VECTYPE_OUT. */
2511 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree mask
,
2512 vect_def_type
*mask_dt_out
,
2513 tree
*mask_vectype_out
)
2515 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2519 "mask argument is not a boolean.\n");
2523 if (TREE_CODE (mask
) != SSA_NAME
)
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2527 "mask argument is not an SSA name.\n");
2531 enum vect_def_type mask_dt
;
2533 if (!vect_is_simple_use (mask
, vinfo
, &mask_dt
, &mask_vectype
))
2535 if (dump_enabled_p ())
2536 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2537 "mask use not simple.\n");
2541 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2543 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2545 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2547 if (dump_enabled_p ())
2548 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2549 "could not find an appropriate vector mask type.\n");
2553 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2554 TYPE_VECTOR_SUBPARTS (vectype
)))
2556 if (dump_enabled_p ())
2557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2558 "vector mask type %T"
2559 " does not match vector data type %T.\n",
2560 mask_vectype
, vectype
);
2565 *mask_dt_out
= mask_dt
;
2566 *mask_vectype_out
= mask_vectype
;
2570 /* Return true if stored value RHS is suitable for vectorizing store
2571 statement STMT_INFO. When returning true, store the type of the
2572 definition in *RHS_DT_OUT, the type of the vectorized store value in
2573 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2576 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree rhs
,
2577 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2578 vec_load_store_type
*vls_type_out
)
2580 /* In the case this is a store from a constant make sure
2581 native_encode_expr can handle it. */
2582 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2584 if (dump_enabled_p ())
2585 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2586 "cannot encode constant as a byte sequence.\n");
2590 enum vect_def_type rhs_dt
;
2592 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_dt
, &rhs_vectype
))
2594 if (dump_enabled_p ())
2595 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2596 "use not simple.\n");
2600 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2601 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2603 if (dump_enabled_p ())
2604 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2605 "incompatible vector types.\n");
2609 *rhs_dt_out
= rhs_dt
;
2610 *rhs_vectype_out
= rhs_vectype
;
2611 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2612 *vls_type_out
= VLS_STORE_INVARIANT
;
2614 *vls_type_out
= VLS_STORE
;
2618 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2619 Note that we support masks with floating-point type, in which case the
2620 floats are interpreted as a bitmask. */
2623 vect_build_all_ones_mask (vec_info
*vinfo
,
2624 stmt_vec_info stmt_info
, tree masktype
)
2626 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2627 return build_int_cst (masktype
, -1);
2628 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2630 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2631 mask
= build_vector_from_val (masktype
, mask
);
2632 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2634 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2638 for (int j
= 0; j
< 6; ++j
)
2640 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2641 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2642 mask
= build_vector_from_val (masktype
, mask
);
2643 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2648 /* Build an all-zero merge value of type VECTYPE while vectorizing
2649 STMT_INFO as a gather load. */
2652 vect_build_zero_merge_argument (vec_info
*vinfo
,
2653 stmt_vec_info stmt_info
, tree vectype
)
2656 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2657 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2658 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2662 for (int j
= 0; j
< 6; ++j
)
2664 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2665 merge
= build_real (TREE_TYPE (vectype
), r
);
2669 merge
= build_vector_from_val (vectype
, merge
);
2670 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2673 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2674 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2675 the gather load operation. If the load is conditional, MASK is the
2676 unvectorized condition and MASK_DT is its definition type, otherwise
2680 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2681 gimple_stmt_iterator
*gsi
,
2682 stmt_vec_info
*vec_stmt
,
2683 gather_scatter_info
*gs_info
,
2686 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2687 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2688 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2689 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2690 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2691 edge pe
= loop_preheader_edge (loop
);
2692 enum { NARROW
, NONE
, WIDEN
} modifier
;
2693 poly_uint64 gather_off_nunits
2694 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2696 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2697 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2698 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2699 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2700 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2701 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2702 tree scaletype
= TREE_VALUE (arglist
);
2703 tree real_masktype
= masktype
;
2704 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2706 || TREE_CODE (masktype
) == INTEGER_TYPE
2707 || types_compatible_p (srctype
, masktype
)));
2708 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2709 masktype
= truth_type_for (srctype
);
2711 tree mask_halftype
= masktype
;
2712 tree perm_mask
= NULL_TREE
;
2713 tree mask_perm_mask
= NULL_TREE
;
2714 if (known_eq (nunits
, gather_off_nunits
))
2716 else if (known_eq (nunits
* 2, gather_off_nunits
))
2720 /* Currently widening gathers and scatters are only supported for
2721 fixed-length vectors. */
2722 int count
= gather_off_nunits
.to_constant ();
2723 vec_perm_builder
sel (count
, count
, 1);
2724 for (int i
= 0; i
< count
; ++i
)
2725 sel
.quick_push (i
| (count
/ 2));
2727 vec_perm_indices
indices (sel
, 1, count
);
2728 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2731 else if (known_eq (nunits
, gather_off_nunits
* 2))
2735 /* Currently narrowing gathers and scatters are only supported for
2736 fixed-length vectors. */
2737 int count
= nunits
.to_constant ();
2738 vec_perm_builder
sel (count
, count
, 1);
2739 sel
.quick_grow (count
);
2740 for (int i
= 0; i
< count
; ++i
)
2741 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2742 vec_perm_indices
indices (sel
, 2, count
);
2743 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2747 if (mask
&& masktype
== real_masktype
)
2749 for (int i
= 0; i
< count
; ++i
)
2750 sel
[i
] = i
| (count
/ 2);
2751 indices
.new_vector (sel
, 2, count
);
2752 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2755 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2760 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2761 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2763 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2764 if (!is_gimple_min_invariant (ptr
))
2767 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2768 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2769 gcc_assert (!new_bb
);
2772 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2774 tree vec_oprnd0
= NULL_TREE
;
2775 tree vec_mask
= NULL_TREE
;
2776 tree src_op
= NULL_TREE
;
2777 tree mask_op
= NULL_TREE
;
2778 tree prev_res
= NULL_TREE
;
2779 stmt_vec_info prev_stmt_info
= NULL
;
2783 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2784 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2787 for (int j
= 0; j
< ncopies
; ++j
)
2790 if (modifier
== WIDEN
&& (j
& 1))
2791 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2792 perm_mask
, stmt_info
, gsi
);
2795 = vect_get_vec_def_for_operand (vinfo
, gs_info
->offset
, stmt_info
);
2797 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2800 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2802 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2803 TYPE_VECTOR_SUBPARTS (idxtype
)));
2804 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2805 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2806 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2807 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2813 if (mask_perm_mask
&& (j
& 1))
2814 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2815 mask_perm_mask
, stmt_info
, gsi
);
2819 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
);
2820 else if (modifier
!= NARROW
|| (j
& 1) == 0)
2821 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2825 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2827 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2828 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2829 gcc_assert (known_eq (sub1
, sub2
));
2830 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2831 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2833 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2834 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2838 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2840 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2842 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2843 : VEC_UNPACK_LO_EXPR
,
2845 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2851 tree mask_arg
= mask_op
;
2852 if (masktype
!= real_masktype
)
2854 tree utype
, optype
= TREE_TYPE (mask_op
);
2855 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2856 utype
= real_masktype
;
2858 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2859 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2860 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2862 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2863 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2865 if (!useless_type_conversion_p (real_masktype
, utype
))
2867 gcc_assert (TYPE_PRECISION (utype
)
2868 <= TYPE_PRECISION (real_masktype
));
2869 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2870 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2871 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2874 src_op
= build_zero_cst (srctype
);
2876 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2879 stmt_vec_info new_stmt_info
;
2880 if (!useless_type_conversion_p (vectype
, rettype
))
2882 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2883 TYPE_VECTOR_SUBPARTS (rettype
)));
2884 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2885 gimple_call_set_lhs (new_call
, op
);
2886 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
2887 var
= make_ssa_name (vec_dest
);
2888 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2889 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2891 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2895 var
= make_ssa_name (vec_dest
, new_call
);
2896 gimple_call_set_lhs (new_call
, var
);
2898 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
2901 if (modifier
== NARROW
)
2908 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2910 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2913 if (prev_stmt_info
== NULL
)
2914 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2916 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2917 prev_stmt_info
= new_stmt_info
;
2921 /* Prepare the base and offset in GS_INFO for vectorization.
2922 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2923 to the vectorized offset argument for the first copy of STMT_INFO.
2924 STMT_INFO is the statement described by GS_INFO and LOOP is the
2928 vect_get_gather_scatter_ops (vec_info
*vinfo
,
2929 class loop
*loop
, stmt_vec_info stmt_info
,
2930 gather_scatter_info
*gs_info
,
2931 tree
*dataref_ptr
, tree
*vec_offset
)
2933 gimple_seq stmts
= NULL
;
2934 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2938 edge pe
= loop_preheader_edge (loop
);
2939 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2940 gcc_assert (!new_bb
);
2942 *vec_offset
= vect_get_vec_def_for_operand (vinfo
, gs_info
->offset
, stmt_info
,
2943 gs_info
->offset_vectype
);
2946 /* Prepare to implement a grouped or strided load or store using
2947 the gather load or scatter store operation described by GS_INFO.
2948 STMT_INFO is the load or store statement.
2950 Set *DATAREF_BUMP to the amount that should be added to the base
2951 address after each copy of the vectorized statement. Set *VEC_OFFSET
2952 to an invariant offset vector in which element I has the value
2953 I * DR_STEP / SCALE. */
2956 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2957 loop_vec_info loop_vinfo
,
2958 gather_scatter_info
*gs_info
,
2959 tree
*dataref_bump
, tree
*vec_offset
)
2961 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2962 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2963 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2966 tree bump
= size_binop (MULT_EXPR
,
2967 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2968 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2969 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2971 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2973 /* The offset given in GS_INFO can have pointer type, so use the element
2974 type of the vector instead. */
2975 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2976 offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2978 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2979 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2980 ssize_int (gs_info
->scale
));
2981 step
= fold_convert (offset_type
, step
);
2982 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2984 /* Create {0, X, X*2, X*3, ...}. */
2985 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2986 build_zero_cst (offset_type
), step
);
2988 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2991 /* Return the amount that should be added to a vector pointer to move
2992 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2993 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2997 vect_get_data_ptr_increment (vec_info
*vinfo
,
2998 dr_vec_info
*dr_info
, tree aggr_type
,
2999 vect_memory_access_type memory_access_type
)
3001 if (memory_access_type
== VMAT_INVARIANT
)
3002 return size_zero_node
;
3004 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3005 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3006 if (tree_int_cst_sgn (step
) == -1)
3007 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3011 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3014 vectorizable_bswap (vec_info
*vinfo
,
3015 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3016 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3018 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3021 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3022 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3025 op
= gimple_call_arg (stmt
, 0);
3026 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3027 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3029 /* Multiple types in SLP are handled by creating the appropriate number of
3030 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3035 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3037 gcc_assert (ncopies
>= 1);
3039 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3043 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3044 unsigned word_bytes
;
3045 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3048 /* The encoding uses one stepped pattern for each byte in the word. */
3049 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3050 for (unsigned i
= 0; i
< 3; ++i
)
3051 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3052 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3054 vec_perm_indices
indices (elts
, 1, num_bytes
);
3055 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3061 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3063 if (dump_enabled_p ())
3064 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3065 "incompatible vector types for invariants\n");
3069 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3070 DUMP_VECT_SCOPE ("vectorizable_bswap");
3073 record_stmt_cost (cost_vec
,
3074 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3075 record_stmt_cost (cost_vec
,
3076 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3081 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3084 vec
<tree
> vec_oprnds
= vNULL
;
3085 stmt_vec_info new_stmt_info
= NULL
;
3086 stmt_vec_info prev_stmt_info
= NULL
;
3087 for (unsigned j
= 0; j
< ncopies
; j
++)
3091 vect_get_vec_defs (vinfo
, op
, NULL
, stmt_info
, &vec_oprnds
, NULL
,
3094 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3096 /* Arguments are ready. create the new vector stmt. */
3099 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3102 tree tem
= make_ssa_name (char_vectype
);
3103 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3104 char_vectype
, vop
));
3105 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3106 tree tem2
= make_ssa_name (char_vectype
);
3107 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3108 tem
, tem
, bswap_vconst
);
3109 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3110 tem
= make_ssa_name (vectype
);
3111 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3114 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3116 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3123 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3125 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3127 prev_stmt_info
= new_stmt_info
;
3130 vec_oprnds
.release ();
3134 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3135 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3136 in a single step. On success, store the binary pack code in
3140 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3141 tree_code
*convert_code
)
3143 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3144 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3148 int multi_step_cvt
= 0;
3149 auto_vec
<tree
, 8> interm_types
;
3150 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3151 &code
, &multi_step_cvt
, &interm_types
)
3155 *convert_code
= code
;
3159 /* Function vectorizable_call.
3161 Check if STMT_INFO performs a function call that can be vectorized.
3162 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3163 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3164 Return true if STMT_INFO is vectorizable in this way. */
3167 vectorizable_call (vec_info
*vinfo
,
3168 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3169 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3170 stmt_vector_for_cost
*cost_vec
)
3176 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3177 stmt_vec_info prev_stmt_info
;
3178 tree vectype_out
, vectype_in
;
3179 poly_uint64 nunits_in
;
3180 poly_uint64 nunits_out
;
3181 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3182 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3183 tree fndecl
, new_temp
, rhs_type
;
3184 enum vect_def_type dt
[4]
3185 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3186 vect_unknown_def_type
};
3187 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3188 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3189 int ndts
= ARRAY_SIZE (dt
);
3191 auto_vec
<tree
, 8> vargs
;
3192 auto_vec
<tree
, 8> orig_vargs
;
3193 enum { NARROW
, NONE
, WIDEN
} modifier
;
3197 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3200 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3204 /* Is STMT_INFO a vectorizable call? */
3205 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3209 if (gimple_call_internal_p (stmt
)
3210 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3211 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3212 /* Handled by vectorizable_load and vectorizable_store. */
3215 if (gimple_call_lhs (stmt
) == NULL_TREE
3216 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3219 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3221 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3223 /* Process function arguments. */
3224 rhs_type
= NULL_TREE
;
3225 vectype_in
= NULL_TREE
;
3226 nargs
= gimple_call_num_args (stmt
);
3228 /* Bail out if the function has more than four arguments, we do not have
3229 interesting builtin functions to vectorize with more than two arguments
3230 except for fma. No arguments is also not good. */
3231 if (nargs
== 0 || nargs
> 4)
3234 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3235 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3236 if (cfn
== CFN_GOMP_SIMD_LANE
)
3239 rhs_type
= unsigned_type_node
;
3243 if (internal_fn_p (cfn
))
3244 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3246 for (i
= 0; i
< nargs
; i
++)
3248 if ((int) i
== mask_opno
)
3250 op
= gimple_call_arg (stmt
, i
);
3251 if (!vect_check_scalar_mask (vinfo
,
3252 stmt_info
, op
, &dt
[i
], &vectypes
[i
]))
3257 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3258 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3260 if (dump_enabled_p ())
3261 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3262 "use not simple.\n");
3266 /* We can only handle calls with arguments of the same type. */
3268 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3270 if (dump_enabled_p ())
3271 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3272 "argument types differ.\n");
3276 rhs_type
= TREE_TYPE (op
);
3279 vectype_in
= vectypes
[i
];
3280 else if (vectypes
[i
]
3281 && !types_compatible_p (vectypes
[i
], vectype_in
))
3283 if (dump_enabled_p ())
3284 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3285 "argument vector types differ.\n");
3289 /* If all arguments are external or constant defs, infer the vector type
3290 from the scalar type. */
3292 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3294 gcc_assert (vectype_in
);
3297 if (dump_enabled_p ())
3298 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3299 "no vectype for scalar type %T\n", rhs_type
);
3303 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3304 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3305 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3306 by a pack of the two vectors into an SI vector. We would need
3307 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3308 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3312 "mismatched vector sizes %T and %T\n",
3313 vectype_in
, vectype_out
);
3317 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3318 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3320 if (dump_enabled_p ())
3321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3322 "mixed mask and nonmask vector types\n");
3327 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3328 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3329 if (known_eq (nunits_in
* 2, nunits_out
))
3331 else if (known_eq (nunits_out
, nunits_in
))
3333 else if (known_eq (nunits_out
* 2, nunits_in
))
3338 /* We only handle functions that do not read or clobber memory. */
3339 if (gimple_vuse (stmt
))
3341 if (dump_enabled_p ())
3342 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3343 "function reads from or writes to memory.\n");
3347 /* For now, we only vectorize functions if a target specific builtin
3348 is available. TODO -- in some cases, it might be profitable to
3349 insert the calls for pieces of the vector, in order to be able
3350 to vectorize other operations in the loop. */
3352 internal_fn ifn
= IFN_LAST
;
3353 tree callee
= gimple_call_fndecl (stmt
);
3355 /* First try using an internal function. */
3356 tree_code convert_code
= ERROR_MARK
;
3358 && (modifier
== NONE
3359 || (modifier
== NARROW
3360 && simple_integer_narrowing (vectype_out
, vectype_in
,
3362 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3365 /* If that fails, try asking for a target-specific built-in function. */
3366 if (ifn
== IFN_LAST
)
3368 if (cfn
!= CFN_LAST
)
3369 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3370 (cfn
, vectype_out
, vectype_in
);
3371 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3372 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3373 (callee
, vectype_out
, vectype_in
);
3376 if (ifn
== IFN_LAST
&& !fndecl
)
3378 if (cfn
== CFN_GOMP_SIMD_LANE
3381 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3382 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3383 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3384 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3386 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3387 { 0, 1, 2, ... vf - 1 } vector. */
3388 gcc_assert (nargs
== 0);
3390 else if (modifier
== NONE
3391 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3392 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3393 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3394 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3395 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3396 slp_op
, vectype_in
, cost_vec
);
3399 if (dump_enabled_p ())
3400 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3401 "function is not vectorizable.\n");
3408 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3409 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3411 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3413 /* Sanity check: make sure that at least one copy of the vectorized stmt
3414 needs to be generated. */
3415 gcc_assert (ncopies
>= 1);
3417 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3418 if (!vec_stmt
) /* transformation not required. */
3421 for (i
= 0; i
< nargs
; ++i
)
3422 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3424 if (dump_enabled_p ())
3425 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3426 "incompatible vector types for invariants\n");
3429 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3430 DUMP_VECT_SCOPE ("vectorizable_call");
3431 vect_model_simple_cost (vinfo
, stmt_info
,
3432 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3433 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3434 record_stmt_cost (cost_vec
, ncopies
/ 2,
3435 vec_promote_demote
, stmt_info
, 0, vect_body
);
3437 if (loop_vinfo
&& mask_opno
>= 0)
3439 unsigned int nvectors
= (slp_node
3440 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3442 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3443 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3444 vectype_out
, scalar_mask
);
3451 if (dump_enabled_p ())
3452 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3455 scalar_dest
= gimple_call_lhs (stmt
);
3456 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3458 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3460 stmt_vec_info new_stmt_info
= NULL
;
3461 prev_stmt_info
= NULL
;
3462 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3464 tree prev_res
= NULL_TREE
;
3465 vargs
.safe_grow (nargs
);
3466 orig_vargs
.safe_grow (nargs
);
3467 for (j
= 0; j
< ncopies
; ++j
)
3469 /* Build argument list for the vectorized call. */
3472 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3473 vec
<tree
> vec_oprnds0
;
3475 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3476 vec_oprnds0
= vec_defs
[0];
3478 /* Arguments are ready. Create the new vector stmt. */
3479 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3482 for (k
= 0; k
< nargs
; k
++)
3484 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3485 vargs
[k
] = vec_oprndsk
[i
];
3487 if (modifier
== NARROW
)
3489 /* We don't define any narrowing conditional functions
3491 gcc_assert (mask_opno
< 0);
3492 tree half_res
= make_ssa_name (vectype_in
);
3494 = gimple_build_call_internal_vec (ifn
, vargs
);
3495 gimple_call_set_lhs (call
, half_res
);
3496 gimple_call_set_nothrow (call
, true);
3497 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3500 prev_res
= half_res
;
3503 new_temp
= make_ssa_name (vec_dest
);
3505 = gimple_build_assign (new_temp
, convert_code
,
3506 prev_res
, half_res
);
3508 = vect_finish_stmt_generation (vinfo
, stmt_info
,
3513 if (mask_opno
>= 0 && masked_loop_p
)
3515 unsigned int vec_num
= vec_oprnds0
.length ();
3516 /* Always true for SLP. */
3517 gcc_assert (ncopies
== 1);
3518 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3520 vargs
[mask_opno
] = prepare_load_store_mask
3521 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3525 if (ifn
!= IFN_LAST
)
3526 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3528 call
= gimple_build_call_vec (fndecl
, vargs
);
3529 new_temp
= make_ssa_name (vec_dest
, call
);
3530 gimple_call_set_lhs (call
, new_temp
);
3531 gimple_call_set_nothrow (call
, true);
3533 = vect_finish_stmt_generation (vinfo
, stmt_info
,
3536 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3539 for (i
= 0; i
< nargs
; i
++)
3541 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3542 vec_oprndsi
.release ();
3547 for (i
= 0; i
< nargs
; i
++)
3549 op
= gimple_call_arg (stmt
, i
);
3552 = vect_get_vec_def_for_operand (vinfo
,
3553 op
, stmt_info
, vectypes
[i
]);
3556 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3558 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3561 if (mask_opno
>= 0 && masked_loop_p
)
3563 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3566 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3567 vargs
[mask_opno
], gsi
);
3570 if (cfn
== CFN_GOMP_SIMD_LANE
)
3572 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3574 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3575 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3576 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3577 new_temp
= make_ssa_name (vec_dest
);
3578 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3580 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3582 else if (modifier
== NARROW
)
3584 /* We don't define any narrowing conditional functions at
3586 gcc_assert (mask_opno
< 0);
3587 tree half_res
= make_ssa_name (vectype_in
);
3588 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3589 gimple_call_set_lhs (call
, half_res
);
3590 gimple_call_set_nothrow (call
, true);
3591 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3594 prev_res
= half_res
;
3597 new_temp
= make_ssa_name (vec_dest
);
3598 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3599 prev_res
, half_res
);
3601 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3606 if (ifn
!= IFN_LAST
)
3607 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3609 call
= gimple_build_call_vec (fndecl
, vargs
);
3610 new_temp
= make_ssa_name (vec_dest
, call
);
3611 gimple_call_set_lhs (call
, new_temp
);
3612 gimple_call_set_nothrow (call
, true);
3614 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3617 if (j
== (modifier
== NARROW
? 1 : 0))
3618 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3620 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3622 prev_stmt_info
= new_stmt_info
;
3625 else if (modifier
== NARROW
)
3627 /* We don't define any narrowing conditional functions at present. */
3628 gcc_assert (mask_opno
< 0);
3629 for (j
= 0; j
< ncopies
; ++j
)
3631 /* Build argument list for the vectorized call. */
3633 vargs
.create (nargs
* 2);
3639 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3640 vec
<tree
> vec_oprnds0
;
3642 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3643 vec_oprnds0
= vec_defs
[0];
3645 /* Arguments are ready. Create the new vector stmt. */
3646 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3650 for (k
= 0; k
< nargs
; k
++)
3652 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3653 vargs
.quick_push (vec_oprndsk
[i
]);
3654 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3657 if (ifn
!= IFN_LAST
)
3658 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3660 call
= gimple_build_call_vec (fndecl
, vargs
);
3661 new_temp
= make_ssa_name (vec_dest
, call
);
3662 gimple_call_set_lhs (call
, new_temp
);
3663 gimple_call_set_nothrow (call
, true);
3665 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3666 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3669 for (i
= 0; i
< nargs
; i
++)
3671 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3672 vec_oprndsi
.release ();
3677 for (i
= 0; i
< nargs
; i
++)
3679 op
= gimple_call_arg (stmt
, i
);
3683 = vect_get_vec_def_for_operand (vinfo
, op
, stmt_info
,
3686 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3690 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3693 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3695 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3698 vargs
.quick_push (vec_oprnd0
);
3699 vargs
.quick_push (vec_oprnd1
);
3702 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3703 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3704 gimple_call_set_lhs (new_stmt
, new_temp
);
3706 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3709 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3711 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3713 prev_stmt_info
= new_stmt_info
;
3716 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3719 /* No current target implements this case. */
3724 /* The call in STMT might prevent it from being removed in dce.
3725 We however cannot remove it here, due to the way the ssa name
3726 it defines is mapped to the new definition. So just replace
3727 rhs of the statement with something harmless. */
3732 stmt_info
= vect_orig_stmt (stmt_info
);
3733 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3736 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3737 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3743 struct simd_call_arg_info
3747 HOST_WIDE_INT linear_step
;
3748 enum vect_def_type dt
;
3750 bool simd_lane_linear
;
3753 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3754 is linear within simd lane (but not within whole loop), note it in
3758 vect_simd_lane_linear (tree op
, class loop
*loop
,
3759 struct simd_call_arg_info
*arginfo
)
3761 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3763 if (!is_gimple_assign (def_stmt
)
3764 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3765 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3768 tree base
= gimple_assign_rhs1 (def_stmt
);
3769 HOST_WIDE_INT linear_step
= 0;
3770 tree v
= gimple_assign_rhs2 (def_stmt
);
3771 while (TREE_CODE (v
) == SSA_NAME
)
3774 def_stmt
= SSA_NAME_DEF_STMT (v
);
3775 if (is_gimple_assign (def_stmt
))
3776 switch (gimple_assign_rhs_code (def_stmt
))
3779 t
= gimple_assign_rhs2 (def_stmt
);
3780 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3782 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3783 v
= gimple_assign_rhs1 (def_stmt
);
3786 t
= gimple_assign_rhs2 (def_stmt
);
3787 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3789 linear_step
= tree_to_shwi (t
);
3790 v
= gimple_assign_rhs1 (def_stmt
);
3793 t
= gimple_assign_rhs1 (def_stmt
);
3794 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3795 || (TYPE_PRECISION (TREE_TYPE (v
))
3796 < TYPE_PRECISION (TREE_TYPE (t
))))
3805 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3807 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3808 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3813 arginfo
->linear_step
= linear_step
;
3815 arginfo
->simd_lane_linear
= true;
3821 /* Return the number of elements in vector type VECTYPE, which is associated
3822 with a SIMD clone. At present these vectors always have a constant
3825 static unsigned HOST_WIDE_INT
3826 simd_clone_subparts (tree vectype
)
3828 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3831 /* Function vectorizable_simd_clone_call.
3833 Check if STMT_INFO performs a function call that can be vectorized
3834 by calling a simd clone of the function.
3835 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3836 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3837 Return true if STMT_INFO is vectorizable in this way. */
3840 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3841 gimple_stmt_iterator
*gsi
,
3842 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3843 stmt_vector_for_cost
*)
3848 tree vec_oprnd0
= NULL_TREE
;
3849 stmt_vec_info prev_stmt_info
;
3851 unsigned int nunits
;
3852 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3853 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3854 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3855 tree fndecl
, new_temp
;
3857 auto_vec
<simd_call_arg_info
> arginfo
;
3858 vec
<tree
> vargs
= vNULL
;
3860 tree lhs
, rtype
, ratype
;
3861 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3863 /* Is STMT a vectorizable call? */
3864 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3868 fndecl
= gimple_call_fndecl (stmt
);
3869 if (fndecl
== NULL_TREE
)
3872 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3873 if (node
== NULL
|| node
->simd_clones
== NULL
)
3876 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3879 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3883 if (gimple_call_lhs (stmt
)
3884 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3887 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3889 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3891 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3898 /* Process function arguments. */
3899 nargs
= gimple_call_num_args (stmt
);
3901 /* Bail out if the function has zero arguments. */
3905 arginfo
.reserve (nargs
, true);
3907 for (i
= 0; i
< nargs
; i
++)
3909 simd_call_arg_info thisarginfo
;
3912 thisarginfo
.linear_step
= 0;
3913 thisarginfo
.align
= 0;
3914 thisarginfo
.op
= NULL_TREE
;
3915 thisarginfo
.simd_lane_linear
= false;
3917 op
= gimple_call_arg (stmt
, i
);
3918 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3919 &thisarginfo
.vectype
)
3920 || thisarginfo
.dt
== vect_uninitialized_def
)
3922 if (dump_enabled_p ())
3923 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3924 "use not simple.\n");
3928 if (thisarginfo
.dt
== vect_constant_def
3929 || thisarginfo
.dt
== vect_external_def
)
3930 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3933 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3934 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3936 if (dump_enabled_p ())
3937 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3938 "vector mask arguments are not supported\n");
3943 /* For linear arguments, the analyze phase should have saved
3944 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3945 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3946 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3948 gcc_assert (vec_stmt
);
3949 thisarginfo
.linear_step
3950 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3952 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3953 thisarginfo
.simd_lane_linear
3954 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3955 == boolean_true_node
);
3956 /* If loop has been peeled for alignment, we need to adjust it. */
3957 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3958 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3959 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3961 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3962 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3963 tree opt
= TREE_TYPE (thisarginfo
.op
);
3964 bias
= fold_convert (TREE_TYPE (step
), bias
);
3965 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3967 = fold_build2 (POINTER_TYPE_P (opt
)
3968 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3969 thisarginfo
.op
, bias
);
3973 && thisarginfo
.dt
!= vect_constant_def
3974 && thisarginfo
.dt
!= vect_external_def
3976 && TREE_CODE (op
) == SSA_NAME
3977 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3979 && tree_fits_shwi_p (iv
.step
))
3981 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3982 thisarginfo
.op
= iv
.base
;
3984 else if ((thisarginfo
.dt
== vect_constant_def
3985 || thisarginfo
.dt
== vect_external_def
)
3986 && POINTER_TYPE_P (TREE_TYPE (op
)))
3987 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3988 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3990 if (POINTER_TYPE_P (TREE_TYPE (op
))
3991 && !thisarginfo
.linear_step
3993 && thisarginfo
.dt
!= vect_constant_def
3994 && thisarginfo
.dt
!= vect_external_def
3997 && TREE_CODE (op
) == SSA_NAME
)
3998 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4000 arginfo
.quick_push (thisarginfo
);
4003 unsigned HOST_WIDE_INT vf
;
4004 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
4006 if (dump_enabled_p ())
4007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4008 "not considering SIMD clones; not yet supported"
4009 " for variable-width vectors.\n");
4013 unsigned int badness
= 0;
4014 struct cgraph_node
*bestn
= NULL
;
4015 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4016 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4018 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4019 n
= n
->simdclone
->next_clone
)
4021 unsigned int this_badness
= 0;
4022 if (n
->simdclone
->simdlen
> vf
4023 || n
->simdclone
->nargs
!= nargs
)
4025 if (n
->simdclone
->simdlen
< vf
)
4026 this_badness
+= (exact_log2 (vf
)
4027 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
4028 if (n
->simdclone
->inbranch
)
4029 this_badness
+= 2048;
4030 int target_badness
= targetm
.simd_clone
.usable (n
);
4031 if (target_badness
< 0)
4033 this_badness
+= target_badness
* 512;
4034 /* FORNOW: Have to add code to add the mask argument. */
4035 if (n
->simdclone
->inbranch
)
4037 for (i
= 0; i
< nargs
; i
++)
4039 switch (n
->simdclone
->args
[i
].arg_type
)
4041 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4042 if (!useless_type_conversion_p
4043 (n
->simdclone
->args
[i
].orig_type
,
4044 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4046 else if (arginfo
[i
].dt
== vect_constant_def
4047 || arginfo
[i
].dt
== vect_external_def
4048 || arginfo
[i
].linear_step
)
4051 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4052 if (arginfo
[i
].dt
!= vect_constant_def
4053 && arginfo
[i
].dt
!= vect_external_def
)
4056 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4057 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4058 if (arginfo
[i
].dt
== vect_constant_def
4059 || arginfo
[i
].dt
== vect_external_def
4060 || (arginfo
[i
].linear_step
4061 != n
->simdclone
->args
[i
].linear_step
))
4064 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4065 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4066 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4067 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4068 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4069 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4073 case SIMD_CLONE_ARG_TYPE_MASK
:
4076 if (i
== (size_t) -1)
4078 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4083 if (arginfo
[i
].align
)
4084 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4085 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4087 if (i
== (size_t) -1)
4089 if (bestn
== NULL
|| this_badness
< badness
)
4092 badness
= this_badness
;
4099 for (i
= 0; i
< nargs
; i
++)
4100 if ((arginfo
[i
].dt
== vect_constant_def
4101 || arginfo
[i
].dt
== vect_external_def
)
4102 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4104 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4105 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4107 if (arginfo
[i
].vectype
== NULL
4108 || (simd_clone_subparts (arginfo
[i
].vectype
)
4109 > bestn
->simdclone
->simdlen
))
4113 fndecl
= bestn
->decl
;
4114 nunits
= bestn
->simdclone
->simdlen
;
4115 ncopies
= vf
/ nunits
;
4117 /* If the function isn't const, only allow it in simd loops where user
4118 has asserted that at least nunits consecutive iterations can be
4119 performed using SIMD instructions. */
4120 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4121 && gimple_vuse (stmt
))
4124 /* Sanity check: make sure that at least one copy of the vectorized stmt
4125 needs to be generated. */
4126 gcc_assert (ncopies
>= 1);
4128 if (!vec_stmt
) /* transformation not required. */
4130 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4131 for (i
= 0; i
< nargs
; i
++)
4132 if ((bestn
->simdclone
->args
[i
].arg_type
4133 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4134 || (bestn
->simdclone
->args
[i
].arg_type
4135 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4137 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4139 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4140 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4141 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4142 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4143 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4144 tree sll
= arginfo
[i
].simd_lane_linear
4145 ? boolean_true_node
: boolean_false_node
;
4146 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4148 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4149 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4150 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4151 dt, slp_node, cost_vec); */
4157 if (dump_enabled_p ())
4158 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4161 scalar_dest
= gimple_call_lhs (stmt
);
4162 vec_dest
= NULL_TREE
;
4167 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4168 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4169 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4172 rtype
= TREE_TYPE (ratype
);
4176 prev_stmt_info
= NULL
;
4177 for (j
= 0; j
< ncopies
; ++j
)
4179 /* Build argument list for the vectorized call. */
4181 vargs
.create (nargs
);
4185 for (i
= 0; i
< nargs
; i
++)
4187 unsigned int k
, l
, m
, o
;
4189 op
= gimple_call_arg (stmt
, i
);
4190 switch (bestn
->simdclone
->args
[i
].arg_type
)
4192 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4193 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4194 o
= nunits
/ simd_clone_subparts (atype
);
4195 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4197 if (simd_clone_subparts (atype
)
4198 < simd_clone_subparts (arginfo
[i
].vectype
))
4200 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4201 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4202 / simd_clone_subparts (atype
));
4203 gcc_assert ((k
& (k
- 1)) == 0);
4206 = vect_get_vec_def_for_operand (vinfo
, op
, stmt_info
);
4209 vec_oprnd0
= arginfo
[i
].op
;
4210 if ((m
& (k
- 1)) == 0)
4212 = vect_get_vec_def_for_stmt_copy (vinfo
,
4215 arginfo
[i
].op
= vec_oprnd0
;
4217 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4219 bitsize_int ((m
& (k
- 1)) * prec
));
4221 = gimple_build_assign (make_ssa_name (atype
),
4223 vect_finish_stmt_generation (vinfo
, stmt_info
,
4225 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4229 k
= (simd_clone_subparts (atype
)
4230 / simd_clone_subparts (arginfo
[i
].vectype
));
4231 gcc_assert ((k
& (k
- 1)) == 0);
4232 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4234 vec_alloc (ctor_elts
, k
);
4237 for (l
= 0; l
< k
; l
++)
4239 if (m
== 0 && l
== 0)
4241 = vect_get_vec_def_for_operand (vinfo
,
4245 = vect_get_vec_def_for_stmt_copy (vinfo
,
4247 arginfo
[i
].op
= vec_oprnd0
;
4250 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4254 vargs
.safe_push (vec_oprnd0
);
4257 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4259 = gimple_build_assign (make_ssa_name (atype
),
4261 vect_finish_stmt_generation (vinfo
, stmt_info
,
4263 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4268 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4269 vargs
.safe_push (op
);
4271 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4272 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4277 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4278 &stmts
, true, NULL_TREE
);
4282 edge pe
= loop_preheader_edge (loop
);
4283 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4284 gcc_assert (!new_bb
);
4286 if (arginfo
[i
].simd_lane_linear
)
4288 vargs
.safe_push (arginfo
[i
].op
);
4291 tree phi_res
= copy_ssa_name (op
);
4292 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4293 loop_vinfo
->add_stmt (new_phi
);
4294 add_phi_arg (new_phi
, arginfo
[i
].op
,
4295 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4297 = POINTER_TYPE_P (TREE_TYPE (op
))
4298 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4299 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4300 ? sizetype
: TREE_TYPE (op
);
4302 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4304 tree tcst
= wide_int_to_tree (type
, cst
);
4305 tree phi_arg
= copy_ssa_name (op
);
4307 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4308 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4309 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4310 loop_vinfo
->add_stmt (new_stmt
);
4311 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4313 arginfo
[i
].op
= phi_res
;
4314 vargs
.safe_push (phi_res
);
4319 = POINTER_TYPE_P (TREE_TYPE (op
))
4320 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4321 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4322 ? sizetype
: TREE_TYPE (op
);
4324 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4326 tree tcst
= wide_int_to_tree (type
, cst
);
4327 new_temp
= make_ssa_name (TREE_TYPE (op
));
4329 = gimple_build_assign (new_temp
, code
,
4330 arginfo
[i
].op
, tcst
);
4331 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4332 vargs
.safe_push (new_temp
);
4335 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4336 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4337 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4338 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4339 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4340 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4346 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4349 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4351 new_temp
= create_tmp_var (ratype
);
4352 else if (simd_clone_subparts (vectype
)
4353 == simd_clone_subparts (rtype
))
4354 new_temp
= make_ssa_name (vec_dest
, new_call
);
4356 new_temp
= make_ssa_name (rtype
, new_call
);
4357 gimple_call_set_lhs (new_call
, new_temp
);
4359 stmt_vec_info new_stmt_info
4360 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4364 if (simd_clone_subparts (vectype
) < nunits
)
4367 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4368 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4369 k
= nunits
/ simd_clone_subparts (vectype
);
4370 gcc_assert ((k
& (k
- 1)) == 0);
4371 for (l
= 0; l
< k
; l
++)
4376 t
= build_fold_addr_expr (new_temp
);
4377 t
= build2 (MEM_REF
, vectype
, t
,
4378 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4381 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4382 bitsize_int (prec
), bitsize_int (l
* prec
));
4384 = gimple_build_assign (make_ssa_name (vectype
), t
);
4386 = vect_finish_stmt_generation (vinfo
, stmt_info
,
4389 if (j
== 0 && l
== 0)
4390 STMT_VINFO_VEC_STMT (stmt_info
)
4391 = *vec_stmt
= new_stmt_info
;
4393 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4395 prev_stmt_info
= new_stmt_info
;
4399 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4402 else if (simd_clone_subparts (vectype
) > nunits
)
4404 unsigned int k
= (simd_clone_subparts (vectype
)
4405 / simd_clone_subparts (rtype
));
4406 gcc_assert ((k
& (k
- 1)) == 0);
4407 if ((j
& (k
- 1)) == 0)
4408 vec_alloc (ret_ctor_elts
, k
);
4411 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4412 for (m
= 0; m
< o
; m
++)
4414 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4415 size_int (m
), NULL_TREE
, NULL_TREE
);
4417 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4419 = vect_finish_stmt_generation (vinfo
, stmt_info
,
4421 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4422 gimple_assign_lhs (new_stmt
));
4424 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4427 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4428 if ((j
& (k
- 1)) != k
- 1)
4430 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4432 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4434 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4436 if ((unsigned) j
== k
- 1)
4437 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4439 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4441 prev_stmt_info
= new_stmt_info
;
4446 tree t
= build_fold_addr_expr (new_temp
);
4447 t
= build2 (MEM_REF
, vectype
, t
,
4448 build_int_cst (TREE_TYPE (t
), 0));
4450 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4452 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4453 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4458 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4460 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4462 prev_stmt_info
= new_stmt_info
;
4467 /* The call in STMT might prevent it from being removed in dce.
4468 We however cannot remove it here, due to the way the ssa name
4469 it defines is mapped to the new definition. So just replace
4470 rhs of the statement with something harmless. */
4478 type
= TREE_TYPE (scalar_dest
);
4479 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4480 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4483 new_stmt
= gimple_build_nop ();
4484 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4485 unlink_stmt_vdef (stmt
);
4491 /* Function vect_gen_widened_results_half
4493 Create a vector stmt whose code, type, number of arguments, and result
4494 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4495 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4496 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4497 needs to be created (DECL is a function-decl of a target-builtin).
4498 STMT_INFO is the original scalar stmt that we are vectorizing. */
4501 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4502 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4503 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4504 stmt_vec_info stmt_info
)
4509 /* Generate half of the widened result: */
4510 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4511 if (op_type
!= binary_op
)
4513 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4514 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4515 gimple_assign_set_lhs (new_stmt
, new_temp
);
4516 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4522 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4523 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4524 containing scalar operand), and for the rest we get a copy with
4525 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4526 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4527 The vectors are collected into VEC_OPRNDS. */
4530 vect_get_loop_based_defs (vec_info
*vinfo
, tree
*oprnd
, stmt_vec_info stmt_info
,
4531 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4535 /* Get first vector operand. */
4536 /* All the vector operands except the very first one (that is scalar oprnd)
4538 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4539 vec_oprnd
= vect_get_vec_def_for_operand (vinfo
, *oprnd
, stmt_info
);
4541 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4543 vec_oprnds
->quick_push (vec_oprnd
);
4545 /* Get second vector operand. */
4546 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4547 vec_oprnds
->quick_push (vec_oprnd
);
4551 /* For conversion in multiple steps, continue to get operands
4554 vect_get_loop_based_defs (vinfo
, oprnd
, stmt_info
, vec_oprnds
,
4555 multi_step_cvt
- 1);
4559 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4560 For multi-step conversions store the resulting vectors and call the function
4564 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4566 stmt_vec_info stmt_info
,
4568 gimple_stmt_iterator
*gsi
,
4569 slp_tree slp_node
, enum tree_code code
,
4570 stmt_vec_info
*prev_stmt_info
)
4573 tree vop0
, vop1
, new_tmp
, vec_dest
;
4575 vec_dest
= vec_dsts
.pop ();
4577 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4579 /* Create demotion operation. */
4580 vop0
= (*vec_oprnds
)[i
];
4581 vop1
= (*vec_oprnds
)[i
+ 1];
4582 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4583 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4584 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4585 stmt_vec_info new_stmt_info
4586 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4589 /* Store the resulting vector for next recursive call. */
4590 (*vec_oprnds
)[i
/2] = new_tmp
;
4593 /* This is the last step of the conversion sequence. Store the
4594 vectors in SLP_NODE or in vector info of the scalar statement
4595 (or in STMT_VINFO_RELATED_STMT chain). */
4597 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4600 if (!*prev_stmt_info
)
4601 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4603 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4605 *prev_stmt_info
= new_stmt_info
;
4610 /* For multi-step demotion operations we first generate demotion operations
4611 from the source type to the intermediate types, and then combine the
4612 results (stored in VEC_OPRNDS) in demotion operation to the destination
4616 /* At each level of recursion we have half of the operands we had at the
4618 vec_oprnds
->truncate ((i
+1)/2);
4619 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4621 stmt_info
, vec_dsts
, gsi
,
4622 slp_node
, VEC_PACK_TRUNC_EXPR
,
4626 vec_dsts
.quick_push (vec_dest
);
4630 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4631 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4632 STMT_INFO. For multi-step conversions store the resulting vectors and
4633 call the function recursively. */
4636 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4637 vec
<tree
> *vec_oprnds0
,
4638 vec
<tree
> *vec_oprnds1
,
4639 stmt_vec_info stmt_info
, tree vec_dest
,
4640 gimple_stmt_iterator
*gsi
,
4641 enum tree_code code1
,
4642 enum tree_code code2
, int op_type
)
4645 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4646 gimple
*new_stmt1
, *new_stmt2
;
4647 vec
<tree
> vec_tmp
= vNULL
;
4649 vec_tmp
.create (vec_oprnds0
->length () * 2);
4650 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4652 if (op_type
== binary_op
)
4653 vop1
= (*vec_oprnds1
)[i
];
4657 /* Generate the two halves of promotion operation. */
4658 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4659 op_type
, vec_dest
, gsi
,
4661 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4662 op_type
, vec_dest
, gsi
,
4664 if (is_gimple_call (new_stmt1
))
4666 new_tmp1
= gimple_call_lhs (new_stmt1
);
4667 new_tmp2
= gimple_call_lhs (new_stmt2
);
4671 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4672 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4675 /* Store the results for the next step. */
4676 vec_tmp
.quick_push (new_tmp1
);
4677 vec_tmp
.quick_push (new_tmp2
);
4680 vec_oprnds0
->release ();
4681 *vec_oprnds0
= vec_tmp
;
4685 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4686 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4687 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4688 Return true if STMT_INFO is vectorizable in this way. */
4691 vectorizable_conversion (vec_info
*vinfo
,
4692 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4693 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4694 stmt_vector_for_cost
*cost_vec
)
4698 tree op0
, op1
= NULL_TREE
;
4699 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4700 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4701 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4702 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4704 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4706 stmt_vec_info prev_stmt_info
;
4707 poly_uint64 nunits_in
;
4708 poly_uint64 nunits_out
;
4709 tree vectype_out
, vectype_in
;
4711 tree lhs_type
, rhs_type
;
4712 enum { NARROW
, NONE
, WIDEN
} modifier
;
4713 vec
<tree
> vec_oprnds0
= vNULL
;
4714 vec
<tree
> vec_oprnds1
= vNULL
;
4716 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4717 int multi_step_cvt
= 0;
4718 vec
<tree
> interm_types
= vNULL
;
4719 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4721 unsigned short fltsz
;
4723 /* Is STMT a vectorizable conversion? */
4725 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4728 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4732 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4736 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4739 code
= gimple_assign_rhs_code (stmt
);
4740 if (!CONVERT_EXPR_CODE_P (code
)
4741 && code
!= FIX_TRUNC_EXPR
4742 && code
!= FLOAT_EXPR
4743 && code
!= WIDEN_MULT_EXPR
4744 && code
!= WIDEN_LSHIFT_EXPR
)
4747 op_type
= TREE_CODE_LENGTH (code
);
4749 /* Check types of lhs and rhs. */
4750 scalar_dest
= gimple_assign_lhs (stmt
);
4751 lhs_type
= TREE_TYPE (scalar_dest
);
4752 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4754 /* Check the operands of the operation. */
4755 slp_tree slp_op0
, slp_op1
= NULL
;
4756 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4757 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4759 if (dump_enabled_p ())
4760 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4761 "use not simple.\n");
4765 rhs_type
= TREE_TYPE (op0
);
4766 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4767 && !((INTEGRAL_TYPE_P (lhs_type
)
4768 && INTEGRAL_TYPE_P (rhs_type
))
4769 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4770 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4773 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4774 && ((INTEGRAL_TYPE_P (lhs_type
)
4775 && !type_has_mode_precision_p (lhs_type
))
4776 || (INTEGRAL_TYPE_P (rhs_type
)
4777 && !type_has_mode_precision_p (rhs_type
))))
4779 if (dump_enabled_p ())
4780 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4781 "type conversion to/from bit-precision unsupported."
4786 if (op_type
== binary_op
)
4788 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4790 op1
= gimple_assign_rhs2 (stmt
);
4792 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4793 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4797 "use not simple.\n");
4800 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4803 vectype_in
= vectype1_in
;
4806 /* If op0 is an external or constant def, infer the vector type
4807 from the scalar type. */
4809 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4811 gcc_assert (vectype_in
);
4814 if (dump_enabled_p ())
4815 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4816 "no vectype for scalar type %T\n", rhs_type
);
4821 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4822 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4824 if (dump_enabled_p ())
4825 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4826 "can't convert between boolean and non "
4827 "boolean vectors %T\n", rhs_type
);
4832 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4833 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4834 if (known_eq (nunits_out
, nunits_in
))
4836 else if (multiple_p (nunits_out
, nunits_in
))
4840 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4844 /* Multiple types in SLP are handled by creating the appropriate number of
4845 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4849 else if (modifier
== NARROW
)
4850 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4852 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4854 /* Sanity check: make sure that at least one copy of the vectorized stmt
4855 needs to be generated. */
4856 gcc_assert (ncopies
>= 1);
4858 bool found_mode
= false;
4859 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4860 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4861 opt_scalar_mode rhs_mode_iter
;
4863 /* Supportable by target? */
4867 if (code
!= FIX_TRUNC_EXPR
4868 && code
!= FLOAT_EXPR
4869 && !CONVERT_EXPR_CODE_P (code
))
4871 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4875 if (dump_enabled_p ())
4876 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4877 "conversion not supported by target.\n");
4881 if (supportable_widening_operation (vinfo
, code
, stmt_info
, vectype_out
,
4882 vectype_in
, &code1
, &code2
,
4883 &multi_step_cvt
, &interm_types
))
4885 /* Binary widening operation can only be supported directly by the
4887 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4891 if (code
!= FLOAT_EXPR
4892 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4895 fltsz
= GET_MODE_SIZE (lhs_mode
);
4896 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4898 rhs_mode
= rhs_mode_iter
.require ();
4899 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4903 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4904 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4905 if (cvt_type
== NULL_TREE
)
4908 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4910 if (!supportable_convert_operation (code
, vectype_out
,
4911 cvt_type
, &codecvt1
))
4914 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4915 vectype_out
, cvt_type
,
4916 &codecvt1
, &codecvt2
,
4921 gcc_assert (multi_step_cvt
== 0);
4923 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
4925 vectype_in
, &code1
, &code2
,
4926 &multi_step_cvt
, &interm_types
))
4936 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4937 codecvt2
= ERROR_MARK
;
4941 interm_types
.safe_push (cvt_type
);
4942 cvt_type
= NULL_TREE
;
4947 gcc_assert (op_type
== unary_op
);
4948 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4949 &code1
, &multi_step_cvt
,
4953 if (code
!= FIX_TRUNC_EXPR
4954 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4958 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4959 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4960 if (cvt_type
== NULL_TREE
)
4962 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4965 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4966 &code1
, &multi_step_cvt
,
4975 if (!vec_stmt
) /* transformation not required. */
4978 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
4979 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
4981 if (dump_enabled_p ())
4982 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4983 "incompatible vector types for invariants\n");
4986 DUMP_VECT_SCOPE ("vectorizable_conversion");
4987 if (modifier
== NONE
)
4989 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4990 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4993 else if (modifier
== NARROW
)
4995 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4996 /* The final packing step produces one vector result per copy. */
4997 unsigned int nvectors
4998 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
4999 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5000 multi_step_cvt
, cost_vec
);
5004 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5005 /* The initial unpacking step produces two vector results
5006 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5007 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5008 unsigned int nvectors
5010 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5012 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5013 multi_step_cvt
, cost_vec
);
5015 interm_types
.release ();
5020 if (dump_enabled_p ())
5021 dump_printf_loc (MSG_NOTE
, vect_location
,
5022 "transform conversion. ncopies = %d.\n", ncopies
);
5024 if (op_type
== binary_op
)
5026 if (CONSTANT_CLASS_P (op0
))
5027 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5028 else if (CONSTANT_CLASS_P (op1
))
5029 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5032 /* In case of multi-step conversion, we first generate conversion operations
5033 to the intermediate types, and then from that types to the final one.
5034 We create vector destinations for the intermediate type (TYPES) received
5035 from supportable_*_operation, and store them in the correct order
5036 for future use in vect_create_vectorized_*_stmts (). */
5037 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5038 vec_dest
= vect_create_destination_var (scalar_dest
,
5039 (cvt_type
&& modifier
== WIDEN
)
5040 ? cvt_type
: vectype_out
);
5041 vec_dsts
.quick_push (vec_dest
);
5045 for (i
= interm_types
.length () - 1;
5046 interm_types
.iterate (i
, &intermediate_type
); i
--)
5048 vec_dest
= vect_create_destination_var (scalar_dest
,
5050 vec_dsts
.quick_push (vec_dest
);
5055 vec_dest
= vect_create_destination_var (scalar_dest
,
5057 ? vectype_out
: cvt_type
);
5061 if (modifier
== WIDEN
)
5063 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
5064 if (op_type
== binary_op
)
5065 vec_oprnds1
.create (1);
5067 else if (modifier
== NARROW
)
5068 vec_oprnds0
.create (
5069 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
5071 else if (code
== WIDEN_LSHIFT_EXPR
)
5072 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5075 prev_stmt_info
= NULL
;
5079 for (j
= 0; j
< ncopies
; j
++)
5082 vect_get_vec_defs (vinfo
, op0
, NULL
, stmt_info
, &vec_oprnds0
,
5085 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
5087 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5089 stmt_vec_info new_stmt_info
;
5090 /* Arguments are ready, create the new vector stmt. */
5091 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5092 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5093 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5094 gimple_assign_set_lhs (new_stmt
, new_temp
);
5096 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5099 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5102 if (!prev_stmt_info
)
5103 STMT_VINFO_VEC_STMT (stmt_info
)
5104 = *vec_stmt
= new_stmt_info
;
5106 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5107 prev_stmt_info
= new_stmt_info
;
5114 /* In case the vectorization factor (VF) is bigger than the number
5115 of elements that we can fit in a vectype (nunits), we have to
5116 generate more than one vector stmt - i.e - we need to "unroll"
5117 the vector stmt by a factor VF/nunits. */
5118 for (j
= 0; j
< ncopies
; j
++)
5125 if (code
== WIDEN_LSHIFT_EXPR
)
5130 /* Store vec_oprnd1 for every vector stmt to be created
5131 for SLP_NODE. We check during the analysis that all
5132 the shift arguments are the same. */
5133 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5134 vec_oprnds1
.quick_push (vec_oprnd1
);
5136 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
,
5137 &vec_oprnds0
, NULL
, slp_node
);
5140 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
, &vec_oprnds0
,
5141 &vec_oprnds1
, slp_node
);
5145 vec_oprnd0
= vect_get_vec_def_for_operand (vinfo
,
5147 vec_oprnds0
.quick_push (vec_oprnd0
);
5148 if (op_type
== binary_op
)
5150 if (code
== WIDEN_LSHIFT_EXPR
)
5154 = vect_get_vec_def_for_operand (vinfo
,
5156 vec_oprnds1
.quick_push (vec_oprnd1
);
5162 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5163 vec_oprnds0
.truncate (0);
5164 vec_oprnds0
.quick_push (vec_oprnd0
);
5165 if (op_type
== binary_op
)
5167 if (code
== WIDEN_LSHIFT_EXPR
)
5170 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5172 vec_oprnds1
.truncate (0);
5173 vec_oprnds1
.quick_push (vec_oprnd1
);
5177 /* Arguments are ready. Create the new vector stmts. */
5178 for (i
= multi_step_cvt
; i
>= 0; i
--)
5180 tree this_dest
= vec_dsts
[i
];
5181 enum tree_code c1
= code1
, c2
= code2
;
5182 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5187 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5188 &vec_oprnds1
, stmt_info
,
5193 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5195 stmt_vec_info new_stmt_info
;
5198 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5199 new_temp
= make_ssa_name (vec_dest
);
5201 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5203 = vect_finish_stmt_generation (vinfo
, stmt_info
,
5207 new_stmt_info
= vinfo
->lookup_def (vop0
);
5210 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5213 if (!prev_stmt_info
)
5214 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5216 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5217 prev_stmt_info
= new_stmt_info
;
5222 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5226 /* In case the vectorization factor (VF) is bigger than the number
5227 of elements that we can fit in a vectype (nunits), we have to
5228 generate more than one vector stmt - i.e - we need to "unroll"
5229 the vector stmt by a factor VF/nunits. */
5230 for (j
= 0; j
< ncopies
; j
++)
5234 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
,
5238 vec_oprnds0
.truncate (0);
5239 vect_get_loop_based_defs (vinfo
,
5240 &last_oprnd
, stmt_info
, &vec_oprnds0
,
5241 vect_pow2 (multi_step_cvt
) - 1);
5244 /* Arguments are ready. Create the new vector stmts. */
5246 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5248 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5249 new_temp
= make_ssa_name (vec_dest
);
5251 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5252 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5253 vec_oprnds0
[i
] = new_temp
;
5256 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5258 stmt_info
, vec_dsts
, gsi
,
5263 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5267 vec_oprnds0
.release ();
5268 vec_oprnds1
.release ();
5269 interm_types
.release ();
5274 /* Return true if we can assume from the scalar form of STMT_INFO that
5275 neither the scalar nor the vector forms will generate code. STMT_INFO
5276 is known not to involve a data reference. */
5279 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5281 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5285 tree lhs
= gimple_assign_lhs (stmt
);
5286 tree_code code
= gimple_assign_rhs_code (stmt
);
5287 tree rhs
= gimple_assign_rhs1 (stmt
);
5289 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5292 if (CONVERT_EXPR_CODE_P (code
))
5293 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5298 /* Function vectorizable_assignment.
5300 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5301 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5302 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5303 Return true if STMT_INFO is vectorizable in this way. */
5306 vectorizable_assignment (vec_info
*vinfo
,
5307 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5308 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5309 stmt_vector_for_cost
*cost_vec
)
5314 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5316 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5320 vec
<tree
> vec_oprnds
= vNULL
;
5322 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5323 stmt_vec_info prev_stmt_info
= NULL
;
5324 enum tree_code code
;
5327 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5330 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5334 /* Is vectorizable assignment? */
5335 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5339 scalar_dest
= gimple_assign_lhs (stmt
);
5340 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5343 if (STMT_VINFO_DATA_REF (stmt_info
))
5346 code
= gimple_assign_rhs_code (stmt
);
5347 if (!(gimple_assign_single_p (stmt
)
5348 || code
== PAREN_EXPR
5349 || CONVERT_EXPR_CODE_P (code
)))
5352 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5353 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5355 /* Multiple types in SLP are handled by creating the appropriate number of
5356 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5361 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5363 gcc_assert (ncopies
>= 1);
5366 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5367 &dt
[0], &vectype_in
))
5369 if (dump_enabled_p ())
5370 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5371 "use not simple.\n");
5375 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5377 /* We can handle NOP_EXPR conversions that do not change the number
5378 of elements or the vector size. */
5379 if ((CONVERT_EXPR_CODE_P (code
)
5380 || code
== VIEW_CONVERT_EXPR
)
5382 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5383 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5384 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5387 /* We do not handle bit-precision changes. */
5388 if ((CONVERT_EXPR_CODE_P (code
)
5389 || code
== VIEW_CONVERT_EXPR
)
5390 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5391 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5392 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5393 /* But a conversion that does not change the bit-pattern is ok. */
5394 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5395 > TYPE_PRECISION (TREE_TYPE (op
)))
5396 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5397 /* Conversion between boolean types of different sizes is
5398 a simple assignment in case their vectypes are same
5400 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5401 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5405 "type conversion to/from bit-precision "
5410 if (!vec_stmt
) /* transformation not required. */
5413 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5417 "incompatible vector types for invariants\n");
5420 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5421 DUMP_VECT_SCOPE ("vectorizable_assignment");
5422 if (!vect_nop_conversion_p (stmt_info
))
5423 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5429 if (dump_enabled_p ())
5430 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5433 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5436 for (j
= 0; j
< ncopies
; j
++)
5440 vect_get_vec_defs (vinfo
, op
, NULL
, stmt_info
, &vec_oprnds
, NULL
,
5443 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5445 /* Arguments are ready. create the new vector stmt. */
5446 stmt_vec_info new_stmt_info
= NULL
;
5447 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5449 if (CONVERT_EXPR_CODE_P (code
)
5450 || code
== VIEW_CONVERT_EXPR
)
5451 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5452 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5453 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5454 gimple_assign_set_lhs (new_stmt
, new_temp
);
5456 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5458 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5465 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5467 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5469 prev_stmt_info
= new_stmt_info
;
5472 vec_oprnds
.release ();
5477 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5478 either as shift by a scalar or by a vector. */
5481 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5484 machine_mode vec_mode
;
5489 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5493 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5495 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5497 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5499 || (optab_handler (optab
, TYPE_MODE (vectype
))
5500 == CODE_FOR_nothing
))
5504 vec_mode
= TYPE_MODE (vectype
);
5505 icode
= (int) optab_handler (optab
, vec_mode
);
5506 if (icode
== CODE_FOR_nothing
)
5513 /* Function vectorizable_shift.
5515 Check if STMT_INFO performs a shift operation that can be vectorized.
5516 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5517 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5518 Return true if STMT_INFO is vectorizable in this way. */
5521 vectorizable_shift (vec_info
*vinfo
,
5522 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5523 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5524 stmt_vector_for_cost
*cost_vec
)
5528 tree op0
, op1
= NULL
;
5529 tree vec_oprnd1
= NULL_TREE
;
5531 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5532 enum tree_code code
;
5533 machine_mode vec_mode
;
5537 machine_mode optab_op2_mode
;
5538 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5540 stmt_vec_info prev_stmt_info
;
5541 poly_uint64 nunits_in
;
5542 poly_uint64 nunits_out
;
5547 vec
<tree
> vec_oprnds0
= vNULL
;
5548 vec
<tree
> vec_oprnds1
= vNULL
;
5551 bool scalar_shift_arg
= true;
5552 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5553 bool incompatible_op1_vectype_p
= false;
5555 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5558 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5559 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5563 /* Is STMT a vectorizable binary/unary operation? */
5564 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5568 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5571 code
= gimple_assign_rhs_code (stmt
);
5573 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5574 || code
== RROTATE_EXPR
))
5577 scalar_dest
= gimple_assign_lhs (stmt
);
5578 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5579 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5581 if (dump_enabled_p ())
5582 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5583 "bit-precision shifts not supported.\n");
5588 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5589 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5591 if (dump_enabled_p ())
5592 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5593 "use not simple.\n");
5596 /* If op0 is an external or constant def, infer the vector type
5597 from the scalar type. */
5599 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5601 gcc_assert (vectype
);
5604 if (dump_enabled_p ())
5605 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5606 "no vectype for scalar type\n");
5610 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5611 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5612 if (maybe_ne (nunits_out
, nunits_in
))
5615 stmt_vec_info op1_def_stmt_info
;
5617 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5618 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5622 "use not simple.\n");
5626 /* Multiple types in SLP are handled by creating the appropriate number of
5627 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5632 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5634 gcc_assert (ncopies
>= 1);
5636 /* Determine whether the shift amount is a vector, or scalar. If the
5637 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5639 if ((dt
[1] == vect_internal_def
5640 || dt
[1] == vect_induction_def
5641 || dt
[1] == vect_nested_cycle
)
5643 scalar_shift_arg
= false;
5644 else if (dt
[1] == vect_constant_def
5645 || dt
[1] == vect_external_def
5646 || dt
[1] == vect_internal_def
)
5648 /* In SLP, need to check whether the shift count is the same,
5649 in loops if it is a constant or invariant, it is always
5653 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5654 stmt_vec_info slpstmt_info
;
5656 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5658 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5659 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5660 scalar_shift_arg
= false;
5663 /* For internal SLP defs we have to make sure we see scalar stmts
5664 for all vector elements.
5665 ??? For different vectors we could resort to a different
5666 scalar shift operand but code-generation below simply always
5668 if (dt
[1] == vect_internal_def
5669 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5671 scalar_shift_arg
= false;
5674 /* If the shift amount is computed by a pattern stmt we cannot
5675 use the scalar amount directly thus give up and use a vector
5677 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5678 scalar_shift_arg
= false;
5682 if (dump_enabled_p ())
5683 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5684 "operand mode requires invariant argument.\n");
5688 /* Vector shifted by vector. */
5689 bool was_scalar_shift_arg
= scalar_shift_arg
;
5690 if (!scalar_shift_arg
)
5692 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5693 if (dump_enabled_p ())
5694 dump_printf_loc (MSG_NOTE
, vect_location
,
5695 "vector/vector shift/rotate found.\n");
5698 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5700 incompatible_op1_vectype_p
5701 = (op1_vectype
== NULL_TREE
5702 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5703 TYPE_VECTOR_SUBPARTS (vectype
))
5704 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5705 if (incompatible_op1_vectype_p
5707 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5708 || slp_op1
->refcnt
!= 1))
5710 if (dump_enabled_p ())
5711 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5712 "unusable type for last operand in"
5713 " vector/vector shift/rotate.\n");
5717 /* See if the machine has a vector shifted by scalar insn and if not
5718 then see if it has a vector shifted by vector insn. */
5721 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5723 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5725 if (dump_enabled_p ())
5726 dump_printf_loc (MSG_NOTE
, vect_location
,
5727 "vector/scalar shift/rotate found.\n");
5731 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5733 && (optab_handler (optab
, TYPE_MODE (vectype
))
5734 != CODE_FOR_nothing
))
5736 scalar_shift_arg
= false;
5738 if (dump_enabled_p ())
5739 dump_printf_loc (MSG_NOTE
, vect_location
,
5740 "vector/vector shift/rotate found.\n");
5743 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5747 /* Unlike the other binary operators, shifts/rotates have
5748 the rhs being int, instead of the same type as the lhs,
5749 so make sure the scalar is the right type if we are
5750 dealing with vectors of long long/long/short/char. */
5751 incompatible_op1_vectype_p
5753 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5759 /* Supportable by target? */
5762 if (dump_enabled_p ())
5763 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5767 vec_mode
= TYPE_MODE (vectype
);
5768 icode
= (int) optab_handler (optab
, vec_mode
);
5769 if (icode
== CODE_FOR_nothing
)
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5773 "op not supported by target.\n");
5774 /* Check only during analysis. */
5775 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5777 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_NOTE
, vect_location
,
5781 "proceeding using word mode.\n");
5784 /* Worthwhile without SIMD support? Check only during analysis. */
5786 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5787 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5789 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5791 "not worthwhile without SIMD support.\n");
5795 if (!vec_stmt
) /* transformation not required. */
5798 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5799 || (!scalar_shift_arg
5800 && (!incompatible_op1_vectype_p
5801 || dt
[1] == vect_constant_def
)
5802 && !vect_maybe_update_slp_op_vectype
5804 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5808 "incompatible vector types for invariants\n");
5811 /* Now adjust the constant shift amount in place. */
5813 && incompatible_op1_vectype_p
5814 && dt
[1] == vect_constant_def
)
5816 for (unsigned i
= 0;
5817 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5819 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5820 = fold_convert (TREE_TYPE (vectype
),
5821 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5822 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5826 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5827 DUMP_VECT_SCOPE ("vectorizable_shift");
5828 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5829 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5835 if (dump_enabled_p ())
5836 dump_printf_loc (MSG_NOTE
, vect_location
,
5837 "transform binary/unary operation.\n");
5839 if (incompatible_op1_vectype_p
&& !slp_node
)
5841 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5842 if (dt
[1] != vect_constant_def
)
5843 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5844 TREE_TYPE (vectype
), NULL
);
5848 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5850 prev_stmt_info
= NULL
;
5851 for (j
= 0; j
< ncopies
; j
++)
5856 if (scalar_shift_arg
)
5858 /* Vector shl and shr insn patterns can be defined with scalar
5859 operand 2 (shift operand). In this case, use constant or loop
5860 invariant op1 directly, without extending it to vector mode
5862 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5863 if (!VECTOR_MODE_P (optab_op2_mode
))
5865 if (dump_enabled_p ())
5866 dump_printf_loc (MSG_NOTE
, vect_location
,
5867 "operand 1 using scalar mode.\n");
5869 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5870 vec_oprnds1
.quick_push (vec_oprnd1
);
5873 /* Store vec_oprnd1 for every vector stmt to be created
5874 for SLP_NODE. We check during the analysis that all
5875 the shift arguments are the same.
5876 TODO: Allow different constants for different vector
5877 stmts generated for an SLP instance. */
5878 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5879 vec_oprnds1
.quick_push (vec_oprnd1
);
5883 else if (slp_node
&& incompatible_op1_vectype_p
)
5885 if (was_scalar_shift_arg
)
5887 /* If the argument was the same in all lanes create
5888 the correctly typed vector shift amount directly. */
5889 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5890 op1
= vect_init_vector (vinfo
, stmt_info
,
5891 op1
, TREE_TYPE (vectype
),
5892 !loop_vinfo
? gsi
: NULL
);
5893 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5894 !loop_vinfo
? gsi
: NULL
);
5895 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5896 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5897 vec_oprnds1
.quick_push (vec_oprnd1
);
5899 else if (dt
[1] == vect_constant_def
)
5900 /* The constant shift amount has been adjusted in place. */
5903 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5906 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5907 (a special case for certain kind of vector shifts); otherwise,
5908 operand 1 should be of a vector type (the usual case). */
5910 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
,
5911 &vec_oprnds0
, NULL
, slp_node
);
5913 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
,
5914 &vec_oprnds0
, &vec_oprnds1
, slp_node
);
5917 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5919 /* Arguments are ready. Create the new vector stmt. */
5920 stmt_vec_info new_stmt_info
= NULL
;
5921 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5923 vop1
= vec_oprnds1
[i
];
5924 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5925 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5926 gimple_assign_set_lhs (new_stmt
, new_temp
);
5928 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5930 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5937 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5939 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5940 prev_stmt_info
= new_stmt_info
;
5943 vec_oprnds0
.release ();
5944 vec_oprnds1
.release ();
5950 /* Function vectorizable_operation.
5952 Check if STMT_INFO performs a binary, unary or ternary operation that can
5954 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5955 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5956 Return true if STMT_INFO is vectorizable in this way. */
5959 vectorizable_operation (vec_info
*vinfo
,
5960 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5961 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5962 stmt_vector_for_cost
*cost_vec
)
5966 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5968 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5969 enum tree_code code
, orig_code
;
5970 machine_mode vec_mode
;
5974 bool target_support_p
;
5975 enum vect_def_type dt
[3]
5976 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5978 stmt_vec_info prev_stmt_info
;
5979 poly_uint64 nunits_in
;
5980 poly_uint64 nunits_out
;
5982 int ncopies
, vec_num
;
5984 vec
<tree
> vec_oprnds0
= vNULL
;
5985 vec
<tree
> vec_oprnds1
= vNULL
;
5986 vec
<tree
> vec_oprnds2
= vNULL
;
5987 tree vop0
, vop1
, vop2
;
5988 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5990 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5993 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5997 /* Is STMT a vectorizable binary/unary operation? */
5998 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6002 /* Loads and stores are handled in vectorizable_{load,store}. */
6003 if (STMT_VINFO_DATA_REF (stmt_info
))
6006 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6008 /* Shifts are handled in vectorizable_shift. */
6009 if (code
== LSHIFT_EXPR
6010 || code
== RSHIFT_EXPR
6011 || code
== LROTATE_EXPR
6012 || code
== RROTATE_EXPR
)
6015 /* Comparisons are handled in vectorizable_comparison. */
6016 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6019 /* Conditions are handled in vectorizable_condition. */
6020 if (code
== COND_EXPR
)
6023 /* For pointer addition and subtraction, we should use the normal
6024 plus and minus for the vector operation. */
6025 if (code
== POINTER_PLUS_EXPR
)
6027 if (code
== POINTER_DIFF_EXPR
)
6030 /* Support only unary or binary operations. */
6031 op_type
= TREE_CODE_LENGTH (code
);
6032 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6034 if (dump_enabled_p ())
6035 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6036 "num. args = %d (not unary/binary/ternary op).\n",
6041 scalar_dest
= gimple_assign_lhs (stmt
);
6042 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6044 /* Most operations cannot handle bit-precision types without extra
6046 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6048 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6049 /* Exception are bitwise binary operations. */
6050 && code
!= BIT_IOR_EXPR
6051 && code
!= BIT_XOR_EXPR
6052 && code
!= BIT_AND_EXPR
)
6054 if (dump_enabled_p ())
6055 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6056 "bit-precision arithmetic not supported.\n");
6061 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6062 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6064 if (dump_enabled_p ())
6065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6066 "use not simple.\n");
6069 /* If op0 is an external or constant def, infer the vector type
6070 from the scalar type. */
6073 /* For boolean type we cannot determine vectype by
6074 invariant value (don't know whether it is a vector
6075 of booleans or vector of integers). We use output
6076 vectype because operations on boolean don't change
6078 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6080 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6082 if (dump_enabled_p ())
6083 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6084 "not supported operation on bool value.\n");
6087 vectype
= vectype_out
;
6090 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6094 gcc_assert (vectype
);
6097 if (dump_enabled_p ())
6098 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6099 "no vectype for scalar type %T\n",
6105 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6106 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6107 if (maybe_ne (nunits_out
, nunits_in
))
6110 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6111 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6112 if (op_type
== binary_op
|| op_type
== ternary_op
)
6114 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6115 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6117 if (dump_enabled_p ())
6118 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6119 "use not simple.\n");
6123 if (op_type
== ternary_op
)
6125 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6126 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6130 "use not simple.\n");
6135 /* Multiple types in SLP are handled by creating the appropriate number of
6136 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6141 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6145 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6149 gcc_assert (ncopies
>= 1);
6151 /* Reject attempts to combine mask types with nonmask types, e.g. if
6152 we have an AND between a (nonmask) boolean loaded from memory and
6153 a (mask) boolean result of a comparison.
6155 TODO: We could easily fix these cases up using pattern statements. */
6156 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6157 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6158 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6160 if (dump_enabled_p ())
6161 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6162 "mixed mask and nonmask vector types\n");
6166 /* Supportable by target? */
6168 vec_mode
= TYPE_MODE (vectype
);
6169 if (code
== MULT_HIGHPART_EXPR
)
6170 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6173 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6176 if (dump_enabled_p ())
6177 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6181 target_support_p
= (optab_handler (optab
, vec_mode
)
6182 != CODE_FOR_nothing
);
6185 if (!target_support_p
)
6187 if (dump_enabled_p ())
6188 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6189 "op not supported by target.\n");
6190 /* Check only during analysis. */
6191 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6192 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
6194 if (dump_enabled_p ())
6195 dump_printf_loc (MSG_NOTE
, vect_location
,
6196 "proceeding using word mode.\n");
6199 /* Worthwhile without SIMD support? Check only during analysis. */
6200 if (!VECTOR_MODE_P (vec_mode
)
6202 && !vect_worthwhile_without_simd_p (vinfo
, code
))
6204 if (dump_enabled_p ())
6205 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6206 "not worthwhile without SIMD support.\n");
6210 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6211 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6212 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6214 if (!vec_stmt
) /* transformation not required. */
6216 /* If this operation is part of a reduction, a fully-masked loop
6217 should only change the active lanes of the reduction chain,
6218 keeping the inactive lanes as-is. */
6220 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
6223 if (cond_fn
== IFN_LAST
6224 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6225 OPTIMIZE_FOR_SPEED
))
6227 if (dump_enabled_p ())
6228 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6229 "can't use a fully-masked loop because no"
6230 " conditional operation is available.\n");
6231 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
6234 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6238 /* Put types on constant and invariant SLP children. */
6240 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6241 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6242 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6244 if (dump_enabled_p ())
6245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6246 "incompatible vector types for invariants\n");
6250 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6251 DUMP_VECT_SCOPE ("vectorizable_operation");
6252 vect_model_simple_cost (vinfo
, stmt_info
,
6253 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6259 if (dump_enabled_p ())
6260 dump_printf_loc (MSG_NOTE
, vect_location
,
6261 "transform binary/unary operation.\n");
6263 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6265 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6266 vectors with unsigned elements, but the result is signed. So, we
6267 need to compute the MINUS_EXPR into vectype temporary and
6268 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6269 tree vec_cvt_dest
= NULL_TREE
;
6270 if (orig_code
== POINTER_DIFF_EXPR
)
6272 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6273 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6277 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6279 /* In case the vectorization factor (VF) is bigger than the number
6280 of elements that we can fit in a vectype (nunits), we have to generate
6281 more than one vector stmt - i.e - we need to "unroll" the
6282 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6283 from one copy of the vector stmt to the next, in the field
6284 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6285 stages to find the correct vector defs to be used when vectorizing
6286 stmts that use the defs of the current stmt. The example below
6287 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6288 we need to create 4 vectorized stmts):
6290 before vectorization:
6291 RELATED_STMT VEC_STMT
6295 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6297 RELATED_STMT VEC_STMT
6298 VS1_0: vx0 = memref0 VS1_1 -
6299 VS1_1: vx1 = memref1 VS1_2 -
6300 VS1_2: vx2 = memref2 VS1_3 -
6301 VS1_3: vx3 = memref3 - -
6302 S1: x = load - VS1_0
6305 step2: vectorize stmt S2 (done here):
6306 To vectorize stmt S2 we first need to find the relevant vector
6307 def for the first operand 'x'. This is, as usual, obtained from
6308 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6309 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6310 relevant vector def 'vx0'. Having found 'vx0' we can generate
6311 the vector stmt VS2_0, and as usual, record it in the
6312 STMT_VINFO_VEC_STMT of stmt S2.
6313 When creating the second copy (VS2_1), we obtain the relevant vector
6314 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6315 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6316 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6317 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6318 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6319 chain of stmts and pointers:
6320 RELATED_STMT VEC_STMT
6321 VS1_0: vx0 = memref0 VS1_1 -
6322 VS1_1: vx1 = memref1 VS1_2 -
6323 VS1_2: vx2 = memref2 VS1_3 -
6324 VS1_3: vx3 = memref3 - -
6325 S1: x = load - VS1_0
6326 VS2_0: vz0 = vx0 + v1 VS2_1 -
6327 VS2_1: vz1 = vx1 + v1 VS2_2 -
6328 VS2_2: vz2 = vx2 + v1 VS2_3 -
6329 VS2_3: vz3 = vx3 + v1 - -
6330 S2: z = x + 1 - VS2_0 */
6332 prev_stmt_info
= NULL
;
6333 for (j
= 0; j
< ncopies
; j
++)
6338 if (op_type
== binary_op
)
6339 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
,
6340 &vec_oprnds0
, &vec_oprnds1
, slp_node
);
6341 else if (op_type
== ternary_op
)
6345 auto_vec
<vec
<tree
> > vec_defs(3);
6346 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
6347 vec_oprnds0
= vec_defs
[0];
6348 vec_oprnds1
= vec_defs
[1];
6349 vec_oprnds2
= vec_defs
[2];
6353 vect_get_vec_defs (vinfo
, op0
, op1
, stmt_info
, &vec_oprnds0
,
6354 &vec_oprnds1
, NULL
);
6355 vect_get_vec_defs (vinfo
, op2
, NULL_TREE
, stmt_info
,
6356 &vec_oprnds2
, NULL
, NULL
);
6360 vect_get_vec_defs (vinfo
, op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
,
6365 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6366 if (op_type
== ternary_op
)
6368 tree vec_oprnd
= vec_oprnds2
.pop ();
6369 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6374 /* Arguments are ready. Create the new vector stmt. */
6375 stmt_vec_info new_stmt_info
= NULL
;
6376 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6378 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6379 ? vec_oprnds1
[i
] : NULL_TREE
);
6380 vop2
= ((op_type
== ternary_op
)
6381 ? vec_oprnds2
[i
] : NULL_TREE
);
6382 if (masked_loop_p
&& reduc_idx
>= 0)
6384 /* Perform the operation on active elements only and take
6385 inactive elements from the reduction chain input. */
6387 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6388 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6389 vectype
, i
* ncopies
+ j
);
6390 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6392 new_temp
= make_ssa_name (vec_dest
, call
);
6393 gimple_call_set_lhs (call
, new_temp
);
6394 gimple_call_set_nothrow (call
, true);
6396 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6400 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6402 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6403 gimple_assign_set_lhs (new_stmt
, new_temp
);
6405 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6408 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6410 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6412 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6413 gimple_assign_set_lhs (new_stmt
, new_temp
);
6414 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
6419 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6426 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6428 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6429 prev_stmt_info
= new_stmt_info
;
6432 vec_oprnds0
.release ();
6433 vec_oprnds1
.release ();
6434 vec_oprnds2
.release ();
6439 /* A helper function to ensure data reference DR_INFO's base alignment. */
6442 ensure_base_align (dr_vec_info
*dr_info
)
6444 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6447 if (dr_info
->base_misaligned
)
6449 tree base_decl
= dr_info
->base_decl
;
6451 // We should only be able to increase the alignment of a base object if
6452 // we know what its new alignment should be at compile time.
6453 unsigned HOST_WIDE_INT align_base_to
=
6454 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6456 if (decl_in_symtab_p (base_decl
))
6457 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6458 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6460 SET_DECL_ALIGN (base_decl
, align_base_to
);
6461 DECL_USER_ALIGN (base_decl
) = 1;
6463 dr_info
->base_misaligned
= false;
6468 /* Function get_group_alias_ptr_type.
6470 Return the alias type for the group starting at FIRST_STMT_INFO. */
6473 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6475 struct data_reference
*first_dr
, *next_dr
;
6477 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6478 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6479 while (next_stmt_info
)
6481 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6482 if (get_alias_set (DR_REF (first_dr
))
6483 != get_alias_set (DR_REF (next_dr
)))
6485 if (dump_enabled_p ())
6486 dump_printf_loc (MSG_NOTE
, vect_location
,
6487 "conflicting alias set types.\n");
6488 return ptr_type_node
;
6490 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6492 return reference_alias_ptr_type (DR_REF (first_dr
));
6496 /* Function scan_operand_equal_p.
6498 Helper function for check_scan_store. Compare two references
6499 with .GOMP_SIMD_LANE bases. */
6502 scan_operand_equal_p (tree ref1
, tree ref2
)
6504 tree ref
[2] = { ref1
, ref2
};
6505 poly_int64 bitsize
[2], bitpos
[2];
6506 tree offset
[2], base
[2];
6507 for (int i
= 0; i
< 2; ++i
)
6510 int unsignedp
, reversep
, volatilep
= 0;
6511 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6512 &offset
[i
], &mode
, &unsignedp
,
6513 &reversep
, &volatilep
);
6514 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6516 if (TREE_CODE (base
[i
]) == MEM_REF
6517 && offset
[i
] == NULL_TREE
6518 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6520 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6521 if (is_gimple_assign (def_stmt
)
6522 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6523 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6524 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6526 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6528 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6529 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6534 if (!operand_equal_p (base
[0], base
[1], 0))
6536 if (maybe_ne (bitsize
[0], bitsize
[1]))
6538 if (offset
[0] != offset
[1])
6540 if (!offset
[0] || !offset
[1])
6542 if (!operand_equal_p (offset
[0], offset
[1], 0))
6545 for (int i
= 0; i
< 2; ++i
)
6547 step
[i
] = integer_one_node
;
6548 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6550 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6551 if (is_gimple_assign (def_stmt
)
6552 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6553 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6556 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6557 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6560 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6562 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6563 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6565 tree rhs1
= NULL_TREE
;
6566 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6568 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6569 if (gimple_assign_cast_p (def_stmt
))
6570 rhs1
= gimple_assign_rhs1 (def_stmt
);
6572 else if (CONVERT_EXPR_P (offset
[i
]))
6573 rhs1
= TREE_OPERAND (offset
[i
], 0);
6575 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6576 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6577 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6578 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6581 if (!operand_equal_p (offset
[0], offset
[1], 0)
6582 || !operand_equal_p (step
[0], step
[1], 0))
6590 enum scan_store_kind
{
6591 /* Normal permutation. */
6592 scan_store_kind_perm
,
6594 /* Whole vector left shift permutation with zero init. */
6595 scan_store_kind_lshift_zero
,
6597 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6598 scan_store_kind_lshift_cond
6601 /* Function check_scan_store.
6603 Verify if we can perform the needed permutations or whole vector shifts.
6604 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6605 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6606 to do at each step. */
6609 scan_store_can_perm_p (tree vectype
, tree init
,
6610 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6612 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6613 unsigned HOST_WIDE_INT nunits
;
6614 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6616 int units_log2
= exact_log2 (nunits
);
6617 if (units_log2
<= 0)
6621 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6622 for (i
= 0; i
<= units_log2
; ++i
)
6624 unsigned HOST_WIDE_INT j
, k
;
6625 enum scan_store_kind kind
= scan_store_kind_perm
;
6626 vec_perm_builder
sel (nunits
, nunits
, 1);
6627 sel
.quick_grow (nunits
);
6628 if (i
== units_log2
)
6630 for (j
= 0; j
< nunits
; ++j
)
6631 sel
[j
] = nunits
- 1;
6635 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6637 for (k
= 0; j
< nunits
; ++j
, ++k
)
6638 sel
[j
] = nunits
+ k
;
6640 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6641 if (!can_vec_perm_const_p (vec_mode
, indices
))
6643 if (i
== units_log2
)
6646 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6648 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6650 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6651 /* Whole vector shifts shift in zeros, so if init is all zero
6652 constant, there is no need to do anything further. */
6653 if ((TREE_CODE (init
) != INTEGER_CST
6654 && TREE_CODE (init
) != REAL_CST
)
6655 || !initializer_zerop (init
))
6657 tree masktype
= truth_type_for (vectype
);
6658 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6660 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6663 kind
= whole_vector_shift_kind
;
6665 if (use_whole_vector
)
6667 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6668 use_whole_vector
->safe_grow_cleared (i
);
6669 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6670 use_whole_vector
->safe_push (kind
);
6678 /* Function check_scan_store.
6680 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6683 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6684 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6685 vect_memory_access_type memory_access_type
)
6687 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6688 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6691 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6694 || memory_access_type
!= VMAT_CONTIGUOUS
6695 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6696 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6697 || loop_vinfo
== NULL
6698 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6699 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6700 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6701 || !integer_zerop (DR_INIT (dr_info
->dr
))
6702 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6703 || !alias_sets_conflict_p (get_alias_set (vectype
),
6704 get_alias_set (TREE_TYPE (ref_type
))))
6706 if (dump_enabled_p ())
6707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6708 "unsupported OpenMP scan store.\n");
6712 /* We need to pattern match code built by OpenMP lowering and simplified
6713 by following optimizations into something we can handle.
6714 #pragma omp simd reduction(inscan,+:r)
6718 #pragma omp scan inclusive (r)
6721 shall have body with:
6722 // Initialization for input phase, store the reduction initializer:
6723 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6724 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6726 // Actual input phase:
6728 r.0_5 = D.2042[_20];
6731 // Initialization for scan phase:
6732 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6738 // Actual scan phase:
6740 r.1_8 = D.2042[_20];
6742 The "omp simd array" variable D.2042 holds the privatized copy used
6743 inside of the loop and D.2043 is another one that holds copies of
6744 the current original list item. The separate GOMP_SIMD_LANE ifn
6745 kinds are there in order to allow optimizing the initializer store
6746 and combiner sequence, e.g. if it is originally some C++ish user
6747 defined reduction, but allow the vectorizer to pattern recognize it
6748 and turn into the appropriate vectorized scan.
6750 For exclusive scan, this is slightly different:
6751 #pragma omp simd reduction(inscan,+:r)
6755 #pragma omp scan exclusive (r)
6758 shall have body with:
6759 // Initialization for input phase, store the reduction initializer:
6760 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6761 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6763 // Actual input phase:
6765 r.0_5 = D.2042[_20];
6768 // Initialization for scan phase:
6769 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6775 // Actual scan phase:
6777 r.1_8 = D.2044[_20];
6780 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6782 /* Match the D.2042[_21] = 0; store above. Just require that
6783 it is a constant or external definition store. */
6784 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6787 if (dump_enabled_p ())
6788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6789 "unsupported OpenMP scan initializer store.\n");
6793 if (! loop_vinfo
->scan_map
)
6794 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6795 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6796 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6799 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6801 /* These stores can be vectorized normally. */
6805 if (rhs_dt
!= vect_internal_def
)
6808 if (dump_enabled_p ())
6809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6810 "unsupported OpenMP scan combiner pattern.\n");
6814 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6815 tree rhs
= gimple_assign_rhs1 (stmt
);
6816 if (TREE_CODE (rhs
) != SSA_NAME
)
6819 gimple
*other_store_stmt
= NULL
;
6820 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6821 bool inscan_var_store
6822 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6824 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6826 if (!inscan_var_store
)
6828 use_operand_p use_p
;
6829 imm_use_iterator iter
;
6830 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6832 gimple
*use_stmt
= USE_STMT (use_p
);
6833 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6835 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6836 || !is_gimple_assign (use_stmt
)
6837 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6839 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6841 other_store_stmt
= use_stmt
;
6843 if (other_store_stmt
== NULL
)
6845 rhs
= gimple_assign_lhs (other_store_stmt
);
6846 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6850 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6852 use_operand_p use_p
;
6853 imm_use_iterator iter
;
6854 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6856 gimple
*use_stmt
= USE_STMT (use_p
);
6857 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6859 if (other_store_stmt
)
6861 other_store_stmt
= use_stmt
;
6867 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6868 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6869 || !is_gimple_assign (def_stmt
)
6870 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6873 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6874 /* For pointer addition, we should use the normal plus for the vector
6878 case POINTER_PLUS_EXPR
:
6881 case MULT_HIGHPART_EXPR
:
6886 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6889 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6890 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6891 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6894 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6895 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6896 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6897 || !gimple_assign_load_p (load1_stmt
)
6898 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6899 || !gimple_assign_load_p (load2_stmt
))
6902 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6903 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6904 if (load1_stmt_info
== NULL
6905 || load2_stmt_info
== NULL
6906 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6907 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6908 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6909 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6912 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6914 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6915 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6916 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6918 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6920 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6924 use_operand_p use_p
;
6925 imm_use_iterator iter
;
6926 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6928 gimple
*use_stmt
= USE_STMT (use_p
);
6929 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6931 if (other_store_stmt
)
6933 other_store_stmt
= use_stmt
;
6937 if (other_store_stmt
== NULL
)
6939 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6940 || !gimple_store_p (other_store_stmt
))
6943 stmt_vec_info other_store_stmt_info
6944 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6945 if (other_store_stmt_info
== NULL
6946 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6947 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6950 gimple
*stmt1
= stmt
;
6951 gimple
*stmt2
= other_store_stmt
;
6952 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6953 std::swap (stmt1
, stmt2
);
6954 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6955 gimple_assign_rhs1 (load2_stmt
)))
6957 std::swap (rhs1
, rhs2
);
6958 std::swap (load1_stmt
, load2_stmt
);
6959 std::swap (load1_stmt_info
, load2_stmt_info
);
6961 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6962 gimple_assign_rhs1 (load1_stmt
)))
6965 tree var3
= NULL_TREE
;
6966 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6967 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6968 gimple_assign_rhs1 (load2_stmt
)))
6970 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6972 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6973 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6974 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6976 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6977 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6978 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6979 || lookup_attribute ("omp simd inscan exclusive",
6980 DECL_ATTRIBUTES (var3
)))
6984 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6985 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6986 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6989 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6990 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6991 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6992 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6993 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6994 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6997 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6998 std::swap (var1
, var2
);
7000 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7002 if (!lookup_attribute ("omp simd inscan exclusive",
7003 DECL_ATTRIBUTES (var1
)))
7008 if (loop_vinfo
->scan_map
== NULL
)
7010 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7014 /* The IL is as expected, now check if we can actually vectorize it.
7021 should be vectorized as (where _40 is the vectorized rhs
7022 from the D.2042[_21] = 0; store):
7023 _30 = MEM <vector(8) int> [(int *)&D.2043];
7024 _31 = MEM <vector(8) int> [(int *)&D.2042];
7025 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7027 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7028 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7030 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7031 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7032 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7034 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7035 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7037 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7038 MEM <vector(8) int> [(int *)&D.2043] = _39;
7039 MEM <vector(8) int> [(int *)&D.2042] = _38;
7046 should be vectorized as (where _40 is the vectorized rhs
7047 from the D.2042[_21] = 0; store):
7048 _30 = MEM <vector(8) int> [(int *)&D.2043];
7049 _31 = MEM <vector(8) int> [(int *)&D.2042];
7050 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7051 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7053 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7054 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7055 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7057 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7058 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7059 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7061 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7062 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7065 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7066 MEM <vector(8) int> [(int *)&D.2044] = _39;
7067 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7068 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7069 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7070 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7073 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7074 if (units_log2
== -1)
7081 /* Function vectorizable_scan_store.
7083 Helper of vectorizable_score, arguments like on vectorizable_store.
7084 Handle only the transformation, checking is done in check_scan_store. */
7087 vectorizable_scan_store (vec_info
*vinfo
,
7088 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7089 stmt_vec_info
*vec_stmt
, int ncopies
)
7091 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7092 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7093 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7094 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7096 if (dump_enabled_p ())
7097 dump_printf_loc (MSG_NOTE
, vect_location
,
7098 "transform scan store. ncopies = %d\n", ncopies
);
7100 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7101 tree rhs
= gimple_assign_rhs1 (stmt
);
7102 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7104 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7105 bool inscan_var_store
7106 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7108 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7110 use_operand_p use_p
;
7111 imm_use_iterator iter
;
7112 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7114 gimple
*use_stmt
= USE_STMT (use_p
);
7115 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7117 rhs
= gimple_assign_lhs (use_stmt
);
7122 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7123 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7124 if (code
== POINTER_PLUS_EXPR
)
7126 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7127 && commutative_tree_code (code
));
7128 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7129 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7130 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7131 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7132 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7133 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7134 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7135 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7136 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7137 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7138 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7140 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7142 std::swap (rhs1
, rhs2
);
7143 std::swap (var1
, var2
);
7144 std::swap (load1_dr_info
, load2_dr_info
);
7147 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7150 unsigned HOST_WIDE_INT nunits
;
7151 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7153 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7154 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7155 gcc_assert (units_log2
> 0);
7156 auto_vec
<tree
, 16> perms
;
7157 perms
.quick_grow (units_log2
+ 1);
7158 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7159 for (int i
= 0; i
<= units_log2
; ++i
)
7161 unsigned HOST_WIDE_INT j
, k
;
7162 vec_perm_builder
sel (nunits
, nunits
, 1);
7163 sel
.quick_grow (nunits
);
7164 if (i
== units_log2
)
7165 for (j
= 0; j
< nunits
; ++j
)
7166 sel
[j
] = nunits
- 1;
7169 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7171 for (k
= 0; j
< nunits
; ++j
, ++k
)
7172 sel
[j
] = nunits
+ k
;
7174 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7175 if (!use_whole_vector
.is_empty ()
7176 && use_whole_vector
[i
] != scan_store_kind_perm
)
7178 if (zero_vec
== NULL_TREE
)
7179 zero_vec
= build_zero_cst (vectype
);
7180 if (masktype
== NULL_TREE
7181 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7182 masktype
= truth_type_for (vectype
);
7183 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7186 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7189 stmt_vec_info prev_stmt_info
= NULL
;
7190 tree vec_oprnd1
= NULL_TREE
;
7191 tree vec_oprnd2
= NULL_TREE
;
7192 tree vec_oprnd3
= NULL_TREE
;
7193 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7194 tree dataref_offset
= build_int_cst (ref_type
, 0);
7195 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7196 vectype
, VMAT_CONTIGUOUS
);
7197 tree ldataref_ptr
= NULL_TREE
;
7198 tree orig
= NULL_TREE
;
7199 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7200 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7201 for (int j
= 0; j
< ncopies
; j
++)
7203 stmt_vec_info new_stmt_info
;
7206 vec_oprnd1
= vect_get_vec_def_for_operand (vinfo
, *init
, stmt_info
);
7207 if (ldataref_ptr
== NULL
)
7208 vec_oprnd2
= vect_get_vec_def_for_operand (vinfo
, rhs1
, stmt_info
);
7209 vec_oprnd3
= vect_get_vec_def_for_operand (vinfo
, rhs2
, stmt_info
);
7214 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
7215 if (ldataref_ptr
== NULL
)
7216 vec_oprnd2
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd2
);
7217 vec_oprnd3
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd3
);
7218 if (!inscan_var_store
)
7219 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7224 vec_oprnd2
= make_ssa_name (vectype
);
7225 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7226 unshare_expr (ldataref_ptr
),
7228 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7229 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7230 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7231 if (prev_stmt_info
== NULL
)
7232 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7234 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7235 prev_stmt_info
= new_stmt_info
;
7238 tree v
= vec_oprnd2
;
7239 for (int i
= 0; i
< units_log2
; ++i
)
7241 tree new_temp
= make_ssa_name (vectype
);
7242 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7244 && (use_whole_vector
[i
]
7245 != scan_store_kind_perm
))
7246 ? zero_vec
: vec_oprnd1
, v
,
7248 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7249 if (prev_stmt_info
== NULL
)
7250 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7252 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7253 prev_stmt_info
= new_stmt_info
;
7255 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7257 /* Whole vector shift shifted in zero bits, but if *init
7258 is not initializer_zerop, we need to replace those elements
7259 with elements from vec_oprnd1. */
7260 tree_vector_builder
vb (masktype
, nunits
, 1);
7261 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7262 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7263 ? boolean_false_node
: boolean_true_node
);
7265 tree new_temp2
= make_ssa_name (vectype
);
7266 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7267 new_temp
, vec_oprnd1
);
7268 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
7270 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7271 prev_stmt_info
= new_stmt_info
;
7272 new_temp
= new_temp2
;
7275 /* For exclusive scan, perform the perms[i] permutation once
7278 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7286 tree new_temp2
= make_ssa_name (vectype
);
7287 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7288 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7289 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7290 prev_stmt_info
= new_stmt_info
;
7295 tree new_temp
= make_ssa_name (vectype
);
7296 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7297 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7298 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7299 prev_stmt_info
= new_stmt_info
;
7301 tree last_perm_arg
= new_temp
;
7302 /* For exclusive scan, new_temp computed above is the exclusive scan
7303 prefix sum. Turn it into inclusive prefix sum for the broadcast
7304 of the last element into orig. */
7305 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7307 last_perm_arg
= make_ssa_name (vectype
);
7308 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7309 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7310 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7311 prev_stmt_info
= new_stmt_info
;
7314 orig
= make_ssa_name (vectype
);
7315 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7316 last_perm_arg
, perms
[units_log2
]);
7317 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7318 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7319 prev_stmt_info
= new_stmt_info
;
7321 if (!inscan_var_store
)
7323 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7324 unshare_expr (dataref_ptr
),
7326 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7327 g
= gimple_build_assign (data_ref
, new_temp
);
7328 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7329 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7330 prev_stmt_info
= new_stmt_info
;
7334 if (inscan_var_store
)
7335 for (int j
= 0; j
< ncopies
; j
++)
7338 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7340 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7341 unshare_expr (dataref_ptr
),
7343 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7344 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7345 stmt_vec_info new_stmt_info
7346 = vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7347 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7348 prev_stmt_info
= new_stmt_info
;
7354 /* Function vectorizable_store.
7356 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7357 that can be vectorized.
7358 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7359 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7360 Return true if STMT_INFO is vectorizable in this way. */
7363 vectorizable_store (vec_info
*vinfo
,
7364 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7365 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7366 stmt_vector_for_cost
*cost_vec
)
7370 tree vec_oprnd
= NULL_TREE
;
7372 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7373 class loop
*loop
= NULL
;
7374 machine_mode vec_mode
;
7376 enum dr_alignment_support alignment_support_scheme
;
7377 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7378 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7379 stmt_vec_info prev_stmt_info
= NULL
;
7380 tree dataref_ptr
= NULL_TREE
;
7381 tree dataref_offset
= NULL_TREE
;
7382 gimple
*ptr_incr
= NULL
;
7385 stmt_vec_info first_stmt_info
;
7387 unsigned int group_size
, i
;
7388 vec
<tree
> oprnds
= vNULL
;
7389 vec
<tree
> result_chain
= vNULL
;
7390 tree offset
= NULL_TREE
;
7391 vec
<tree
> vec_oprnds
= vNULL
;
7392 bool slp
= (slp_node
!= NULL
);
7393 unsigned int vec_num
;
7394 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7396 gather_scatter_info gs_info
;
7398 vec_load_store_type vls_type
;
7401 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7404 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7408 /* Is vectorizable store? */
7410 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7411 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7413 tree scalar_dest
= gimple_assign_lhs (assign
);
7414 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7415 && is_pattern_stmt_p (stmt_info
))
7416 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7417 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7418 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7419 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7420 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7421 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7422 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7423 && TREE_CODE (scalar_dest
) != MEM_REF
)
7428 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7429 if (!call
|| !gimple_call_internal_p (call
))
7432 internal_fn ifn
= gimple_call_internal_fn (call
);
7433 if (!internal_store_fn_p (ifn
))
7436 if (slp_node
!= NULL
)
7438 if (dump_enabled_p ())
7439 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7440 "SLP of masked stores not supported.\n");
7444 int mask_index
= internal_fn_mask_index (ifn
);
7445 if (mask_index
>= 0)
7447 mask
= gimple_call_arg (call
, mask_index
);
7448 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
7454 op
= vect_get_store_rhs (stmt_info
);
7456 /* Cannot have hybrid store SLP -- that would mean storing to the
7457 same location twice. */
7458 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7460 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7461 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7465 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7466 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7471 /* Multiple types in SLP are handled by creating the appropriate number of
7472 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7477 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7479 gcc_assert (ncopies
>= 1);
7481 /* FORNOW. This restriction should be relaxed. */
7482 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7484 if (dump_enabled_p ())
7485 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7486 "multiple types in nested loop.\n");
7490 if (!vect_check_store_rhs (vinfo
, stmt_info
,
7491 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7494 elem_type
= TREE_TYPE (vectype
);
7495 vec_mode
= TYPE_MODE (vectype
);
7497 if (!STMT_VINFO_DATA_REF (stmt_info
))
7500 vect_memory_access_type memory_access_type
;
7501 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp
, mask
, vls_type
,
7502 ncopies
, &memory_access_type
, &gs_info
))
7507 if (memory_access_type
== VMAT_CONTIGUOUS
)
7509 if (!VECTOR_MODE_P (vec_mode
)
7510 || !can_vec_mask_load_store_p (vec_mode
,
7511 TYPE_MODE (mask_vectype
), false))
7514 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7515 && (memory_access_type
!= VMAT_GATHER_SCATTER
7516 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7518 if (dump_enabled_p ())
7519 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7520 "unsupported access type for masked store.\n");
7526 /* FORNOW. In some cases can vectorize even if data-type not supported
7527 (e.g. - array initialization with 0). */
7528 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7532 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7533 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7534 && memory_access_type
!= VMAT_GATHER_SCATTER
7535 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7538 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7539 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7540 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7544 first_stmt_info
= stmt_info
;
7545 first_dr_info
= dr_info
;
7546 group_size
= vec_num
= 1;
7549 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7551 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7552 memory_access_type
))
7556 if (!vec_stmt
) /* transformation not required. */
7558 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7561 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7562 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
7563 memory_access_type
, &gs_info
, mask
);
7566 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7569 if (dump_enabled_p ())
7570 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7571 "incompatible vector types for invariants\n");
7575 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7576 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7577 memory_access_type
, vls_type
, slp_node
, cost_vec
);
7580 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7584 ensure_base_align (dr_info
);
7586 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7588 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7589 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7590 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7591 tree ptr
, var
, scale
, vec_mask
;
7592 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7593 tree mask_halfvectype
= mask_vectype
;
7594 edge pe
= loop_preheader_edge (loop
);
7597 enum { NARROW
, NONE
, WIDEN
} modifier
;
7598 poly_uint64 scatter_off_nunits
7599 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7601 if (known_eq (nunits
, scatter_off_nunits
))
7603 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7607 /* Currently gathers and scatters are only supported for
7608 fixed-length vectors. */
7609 unsigned int count
= scatter_off_nunits
.to_constant ();
7610 vec_perm_builder
sel (count
, count
, 1);
7611 for (i
= 0; i
< (unsigned int) count
; ++i
)
7612 sel
.quick_push (i
| (count
/ 2));
7614 vec_perm_indices
indices (sel
, 1, count
);
7615 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7617 gcc_assert (perm_mask
!= NULL_TREE
);
7619 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7623 /* Currently gathers and scatters are only supported for
7624 fixed-length vectors. */
7625 unsigned int count
= nunits
.to_constant ();
7626 vec_perm_builder
sel (count
, count
, 1);
7627 for (i
= 0; i
< (unsigned int) count
; ++i
)
7628 sel
.quick_push (i
| (count
/ 2));
7630 vec_perm_indices
indices (sel
, 2, count
);
7631 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7632 gcc_assert (perm_mask
!= NULL_TREE
);
7636 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7641 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7642 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7643 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7644 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7645 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7646 scaletype
= TREE_VALUE (arglist
);
7648 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7649 && TREE_CODE (rettype
) == VOID_TYPE
);
7651 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7652 if (!is_gimple_min_invariant (ptr
))
7654 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7655 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7656 gcc_assert (!new_bb
);
7659 if (mask
== NULL_TREE
)
7661 mask_arg
= build_int_cst (masktype
, -1);
7662 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7663 mask_arg
, masktype
, NULL
);
7666 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7668 prev_stmt_info
= NULL
;
7669 for (j
= 0; j
< ncopies
; ++j
)
7673 src
= vec_oprnd1
= vect_get_vec_def_for_operand (vinfo
,
7675 op
= vec_oprnd0
= vect_get_vec_def_for_operand (vinfo
,
7679 mask_op
= vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
,
7682 else if (modifier
!= NONE
&& (j
& 1))
7684 if (modifier
== WIDEN
)
7687 = vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7689 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7690 perm_mask
, stmt_info
, gsi
);
7693 = vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7696 else if (modifier
== NARROW
)
7698 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7699 perm_mask
, stmt_info
, gsi
);
7700 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7708 src
= vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
7710 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
7713 mask_op
= vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
7717 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7719 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7720 TYPE_VECTOR_SUBPARTS (srctype
)));
7721 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7722 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7724 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7725 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7729 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7731 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7732 TYPE_VECTOR_SUBPARTS (idxtype
)));
7733 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7734 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7736 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7737 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7745 if (modifier
== NARROW
)
7747 var
= vect_get_new_ssa_name (mask_halfvectype
,
7750 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7751 : VEC_UNPACK_LO_EXPR
,
7753 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7756 tree optype
= TREE_TYPE (mask_arg
);
7757 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7760 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7761 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7762 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7764 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7765 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7767 if (!useless_type_conversion_p (masktype
, utype
))
7769 gcc_assert (TYPE_PRECISION (utype
)
7770 <= TYPE_PRECISION (masktype
));
7771 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7772 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7773 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7779 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7780 stmt_vec_info new_stmt_info
7781 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7783 if (prev_stmt_info
== NULL
)
7784 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7786 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7787 prev_stmt_info
= new_stmt_info
;
7791 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7792 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7794 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7795 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7800 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7802 /* We vectorize all the stmts of the interleaving group when we
7803 reach the last stmt in the group. */
7804 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7805 < DR_GROUP_SIZE (first_stmt_info
)
7814 grouped_store
= false;
7815 /* VEC_NUM is the number of vect stmts to be created for this
7817 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7818 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7819 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7820 == first_stmt_info
);
7821 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7822 op
= vect_get_store_rhs (first_stmt_info
);
7825 /* VEC_NUM is the number of vect stmts to be created for this
7827 vec_num
= group_size
;
7829 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7832 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7834 if (dump_enabled_p ())
7835 dump_printf_loc (MSG_NOTE
, vect_location
,
7836 "transform store. ncopies = %d\n", ncopies
);
7838 if (memory_access_type
== VMAT_ELEMENTWISE
7839 || memory_access_type
== VMAT_STRIDED_SLP
)
7841 gimple_stmt_iterator incr_gsi
;
7847 tree stride_base
, stride_step
, alias_off
;
7851 /* Checked by get_load_store_type. */
7852 unsigned int const_nunits
= nunits
.to_constant ();
7854 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7855 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7857 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7859 = fold_build_pointer_plus
7860 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7861 size_binop (PLUS_EXPR
,
7862 convert_to_ptrofftype (dr_offset
),
7863 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7864 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7866 /* For a store with loop-invariant (but other than power-of-2)
7867 stride (i.e. not a grouped access) like so:
7869 for (i = 0; i < n; i += stride)
7872 we generate a new induction variable and new stores from
7873 the components of the (vectorized) rhs:
7875 for (j = 0; ; j += VF*stride)
7880 array[j + stride] = tmp2;
7884 unsigned nstores
= const_nunits
;
7886 tree ltype
= elem_type
;
7887 tree lvectype
= vectype
;
7890 if (group_size
< const_nunits
7891 && const_nunits
% group_size
== 0)
7893 nstores
= const_nunits
/ group_size
;
7895 ltype
= build_vector_type (elem_type
, group_size
);
7898 /* First check if vec_extract optab doesn't support extraction
7899 of vector elts directly. */
7900 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7902 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7903 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7904 group_size
).exists (&vmode
)
7905 || (convert_optab_handler (vec_extract_optab
,
7906 TYPE_MODE (vectype
), vmode
)
7907 == CODE_FOR_nothing
))
7909 /* Try to avoid emitting an extract of vector elements
7910 by performing the extracts using an integer type of the
7911 same size, extracting from a vector of those and then
7912 re-interpreting it as the original vector type if
7915 = group_size
* GET_MODE_BITSIZE (elmode
);
7916 unsigned int lnunits
= const_nunits
/ group_size
;
7917 /* If we can't construct such a vector fall back to
7918 element extracts from the original vector type and
7919 element size stores. */
7920 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7921 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7922 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7923 lnunits
).exists (&vmode
)
7924 && (convert_optab_handler (vec_extract_optab
,
7926 != CODE_FOR_nothing
))
7930 ltype
= build_nonstandard_integer_type (lsize
, 1);
7931 lvectype
= build_vector_type (ltype
, nstores
);
7933 /* Else fall back to vector extraction anyway.
7934 Fewer stores are more important than avoiding spilling
7935 of the vector we extract from. Compared to the
7936 construction case in vectorizable_load no store-forwarding
7937 issue exists here for reasonable archs. */
7940 else if (group_size
>= const_nunits
7941 && group_size
% const_nunits
== 0)
7944 lnel
= const_nunits
;
7948 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7949 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7952 ivstep
= stride_step
;
7953 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7954 build_int_cst (TREE_TYPE (ivstep
), vf
));
7956 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7958 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7959 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7960 create_iv (stride_base
, ivstep
, NULL
,
7961 loop
, &incr_gsi
, insert_after
,
7963 incr
= gsi_stmt (incr_gsi
);
7964 loop_vinfo
->add_stmt (incr
);
7966 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7968 prev_stmt_info
= NULL
;
7969 alias_off
= build_int_cst (ref_type
, 0);
7970 stmt_vec_info next_stmt_info
= first_stmt_info
;
7971 for (g
= 0; g
< group_size
; g
++)
7973 running_off
= offvar
;
7976 tree size
= TYPE_SIZE_UNIT (ltype
);
7977 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7979 tree newoff
= copy_ssa_name (running_off
, NULL
);
7980 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7982 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7983 running_off
= newoff
;
7985 unsigned int group_el
= 0;
7986 unsigned HOST_WIDE_INT
7987 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7988 for (j
= 0; j
< ncopies
; j
++)
7990 /* We've set op and dt above, from vect_get_store_rhs,
7991 and first_stmt_info == stmt_info. */
7996 vect_get_vec_defs (vinfo
, op
, NULL_TREE
, stmt_info
,
7997 &vec_oprnds
, NULL
, slp_node
);
7998 vec_oprnd
= vec_oprnds
[0];
8002 op
= vect_get_store_rhs (next_stmt_info
);
8003 vec_oprnd
= vect_get_vec_def_for_operand
8004 (vinfo
, op
, next_stmt_info
);
8010 vec_oprnd
= vec_oprnds
[j
];
8012 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
8015 /* Pun the vector to extract from if necessary. */
8016 if (lvectype
!= vectype
)
8018 tree tem
= make_ssa_name (lvectype
);
8020 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
8021 lvectype
, vec_oprnd
));
8022 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8025 for (i
= 0; i
< nstores
; i
++)
8027 tree newref
, newoff
;
8028 gimple
*incr
, *assign
;
8029 tree size
= TYPE_SIZE (ltype
);
8030 /* Extract the i'th component. */
8031 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8032 bitsize_int (i
), size
);
8033 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8036 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8040 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8042 newref
= build2 (MEM_REF
, ltype
,
8043 running_off
, this_off
);
8044 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8046 /* And store it to *running_off. */
8047 assign
= gimple_build_assign (newref
, elem
);
8048 stmt_vec_info assign_info
8049 = vect_finish_stmt_generation (vinfo
, stmt_info
,
8054 || group_el
== group_size
)
8056 newoff
= copy_ssa_name (running_off
, NULL
);
8057 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8058 running_off
, stride_step
);
8059 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8061 running_off
= newoff
;
8064 if (g
== group_size
- 1
8067 if (j
== 0 && i
== 0)
8068 STMT_VINFO_VEC_STMT (stmt_info
)
8069 = *vec_stmt
= assign_info
;
8071 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
8072 prev_stmt_info
= assign_info
;
8076 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8081 vec_oprnds
.release ();
8085 auto_vec
<tree
> dr_chain (group_size
);
8086 oprnds
.create (group_size
);
8088 /* Gather-scatter accesses perform only component accesses, alignment
8089 is irrelevant for them. */
8090 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8091 alignment_support_scheme
= dr_unaligned_supported
;
8093 alignment_support_scheme
8094 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
8096 gcc_assert (alignment_support_scheme
);
8097 vec_loop_masks
*loop_masks
8098 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8099 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8101 /* Targets with store-lane instructions must not require explicit
8102 realignment. vect_supportable_dr_alignment always returns either
8103 dr_aligned or dr_unaligned_supported for masked operations. */
8104 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8107 || alignment_support_scheme
== dr_aligned
8108 || alignment_support_scheme
== dr_unaligned_supported
);
8110 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
8111 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8112 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8115 tree vec_offset
= NULL_TREE
;
8116 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8118 aggr_type
= NULL_TREE
;
8121 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8123 aggr_type
= elem_type
;
8124 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8125 &bump
, &vec_offset
);
8129 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8130 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8132 aggr_type
= vectype
;
8133 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8134 memory_access_type
);
8138 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8140 /* In case the vectorization factor (VF) is bigger than the number
8141 of elements that we can fit in a vectype (nunits), we have to generate
8142 more than one vector stmt - i.e - we need to "unroll" the
8143 vector stmt by a factor VF/nunits. For more details see documentation in
8144 vect_get_vec_def_for_copy_stmt. */
8146 /* In case of interleaving (non-unit grouped access):
8153 We create vectorized stores starting from base address (the access of the
8154 first stmt in the chain (S2 in the above example), when the last store stmt
8155 of the chain (S4) is reached:
8158 VS2: &base + vec_size*1 = vx0
8159 VS3: &base + vec_size*2 = vx1
8160 VS4: &base + vec_size*3 = vx3
8162 Then permutation statements are generated:
8164 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8165 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8168 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8169 (the order of the data-refs in the output of vect_permute_store_chain
8170 corresponds to the order of scalar stmts in the interleaving chain - see
8171 the documentation of vect_permute_store_chain()).
8173 In case of both multiple types and interleaving, above vector stores and
8174 permutation stmts are created for every copy. The result vector stmts are
8175 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8176 STMT_VINFO_RELATED_STMT for the next copies.
8179 prev_stmt_info
= NULL
;
8180 tree vec_mask
= NULL_TREE
;
8181 for (j
= 0; j
< ncopies
; j
++)
8183 stmt_vec_info new_stmt_info
;
8188 /* Get vectorized arguments for SLP_NODE. */
8189 vect_get_vec_defs (vinfo
, op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
8192 vec_oprnd
= vec_oprnds
[0];
8196 /* For interleaved stores we collect vectorized defs for all the
8197 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8198 used as an input to vect_permute_store_chain(), and OPRNDS as
8199 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8201 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8202 OPRNDS are of size 1. */
8203 stmt_vec_info next_stmt_info
= first_stmt_info
;
8204 for (i
= 0; i
< group_size
; i
++)
8206 /* Since gaps are not supported for interleaved stores,
8207 DR_GROUP_SIZE is the exact number of stmts in the chain.
8208 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8209 that there is no interleaving, DR_GROUP_SIZE is 1,
8210 and only one iteration of the loop will be executed. */
8211 op
= vect_get_store_rhs (next_stmt_info
);
8212 vec_oprnd
= vect_get_vec_def_for_operand
8213 (vinfo
, op
, next_stmt_info
);
8214 dr_chain
.quick_push (vec_oprnd
);
8215 oprnds
.quick_push (vec_oprnd
);
8216 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8219 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
,
8223 /* We should have catched mismatched types earlier. */
8224 gcc_assert (useless_type_conversion_p (vectype
,
8225 TREE_TYPE (vec_oprnd
)));
8226 bool simd_lane_access_p
8227 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8228 if (simd_lane_access_p
8230 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8231 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8232 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8233 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8234 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8235 get_alias_set (TREE_TYPE (ref_type
))))
8237 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8238 dataref_offset
= build_int_cst (ref_type
, 0);
8240 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8241 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
8242 &dataref_ptr
, &vec_offset
);
8245 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8246 simd_lane_access_p
? loop
: NULL
,
8247 offset
, &dummy
, gsi
, &ptr_incr
,
8248 simd_lane_access_p
, NULL_TREE
, bump
);
8252 /* For interleaved stores we created vectorized defs for all the
8253 defs stored in OPRNDS in the previous iteration (previous copy).
8254 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8255 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8257 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8258 OPRNDS are of size 1. */
8259 for (i
= 0; i
< group_size
; i
++)
8262 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
8263 dr_chain
[i
] = vec_oprnd
;
8264 oprnds
[i
] = vec_oprnd
;
8267 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8270 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8271 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8272 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8274 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8278 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8282 /* Get an array into which we can store the individual vectors. */
8283 vec_array
= create_vector_array (vectype
, vec_num
);
8285 /* Invalidate the current contents of VEC_ARRAY. This should
8286 become an RTL clobber too, which prevents the vector registers
8287 from being upward-exposed. */
8288 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8290 /* Store the individual vectors into the array. */
8291 for (i
= 0; i
< vec_num
; i
++)
8293 vec_oprnd
= dr_chain
[i
];
8294 write_vector_array (vinfo
, stmt_info
,
8295 gsi
, vec_oprnd
, vec_array
, i
);
8298 tree final_mask
= NULL
;
8300 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8303 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8310 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8312 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8313 tree alias_ptr
= build_int_cst (ref_type
, align
);
8314 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8315 dataref_ptr
, alias_ptr
,
8316 final_mask
, vec_array
);
8321 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8322 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8323 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8325 gimple_call_set_lhs (call
, data_ref
);
8327 gimple_call_set_nothrow (call
, true);
8328 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
8331 /* Record that VEC_ARRAY is now dead. */
8332 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8336 new_stmt_info
= NULL
;
8340 result_chain
.create (group_size
);
8342 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8343 gsi
, &result_chain
);
8346 stmt_vec_info next_stmt_info
= first_stmt_info
;
8347 for (i
= 0; i
< vec_num
; i
++)
8350 unsigned HOST_WIDE_INT align
;
8352 tree final_mask
= NULL_TREE
;
8354 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8356 vectype
, vec_num
* j
+ i
);
8358 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8361 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8363 tree scale
= size_int (gs_info
.scale
);
8366 call
= gimple_build_call_internal
8367 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8368 scale
, vec_oprnd
, final_mask
);
8370 call
= gimple_build_call_internal
8371 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8373 gimple_call_set_nothrow (call
, true);
8375 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8380 /* Bump the vector pointer. */
8381 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8382 gsi
, stmt_info
, bump
);
8385 vec_oprnd
= vec_oprnds
[i
];
8386 else if (grouped_store
)
8387 /* For grouped stores vectorized defs are interleaved in
8388 vect_permute_store_chain(). */
8389 vec_oprnd
= result_chain
[i
];
8391 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8392 if (aligned_access_p (first_dr_info
))
8394 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8396 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8400 misalign
= DR_MISALIGNMENT (first_dr_info
);
8401 if (dataref_offset
== NULL_TREE
8402 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8403 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8406 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8408 tree perm_mask
= perm_mask_for_reverse (vectype
);
8409 tree perm_dest
= vect_create_destination_var
8410 (vect_get_store_rhs (stmt_info
), vectype
);
8411 tree new_temp
= make_ssa_name (perm_dest
);
8413 /* Generate the permute statement. */
8415 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8416 vec_oprnd
, perm_mask
);
8417 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8419 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8420 vec_oprnd
= new_temp
;
8423 /* Arguments are ready. Create the new vector stmt. */
8426 align
= least_bit_hwi (misalign
| align
);
8427 tree ptr
= build_int_cst (ref_type
, align
);
8429 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8431 final_mask
, vec_oprnd
);
8432 gimple_call_set_nothrow (call
, true);
8434 = vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8438 data_ref
= fold_build2 (MEM_REF
, vectype
,
8442 : build_int_cst (ref_type
, 0));
8443 if (aligned_access_p (first_dr_info
))
8445 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8446 TREE_TYPE (data_ref
)
8447 = build_aligned_type (TREE_TYPE (data_ref
),
8448 align
* BITS_PER_UNIT
);
8450 TREE_TYPE (data_ref
)
8451 = build_aligned_type (TREE_TYPE (data_ref
),
8452 TYPE_ALIGN (elem_type
));
8453 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8455 = gimple_build_assign (data_ref
, vec_oprnd
);
8457 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8463 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8464 if (!next_stmt_info
)
8471 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8473 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8474 prev_stmt_info
= new_stmt_info
;
8479 result_chain
.release ();
8480 vec_oprnds
.release ();
8485 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8486 VECTOR_CST mask. No checks are made that the target platform supports the
8487 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8488 vect_gen_perm_mask_checked. */
8491 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8495 poly_uint64 nunits
= sel
.length ();
8496 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8498 mask_type
= build_vector_type (ssizetype
, nunits
);
8499 return vec_perm_indices_to_tree (mask_type
, sel
);
8502 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8503 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8506 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8508 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8509 return vect_gen_perm_mask_any (vectype
, sel
);
8512 /* Given a vector variable X and Y, that was generated for the scalar
8513 STMT_INFO, generate instructions to permute the vector elements of X and Y
8514 using permutation mask MASK_VEC, insert them at *GSI and return the
8515 permuted vector variable. */
8518 permute_vec_elements (vec_info
*vinfo
,
8519 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8520 gimple_stmt_iterator
*gsi
)
8522 tree vectype
= TREE_TYPE (x
);
8523 tree perm_dest
, data_ref
;
8526 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8527 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8528 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8530 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8531 data_ref
= make_ssa_name (perm_dest
);
8533 /* Generate the permute statement. */
8534 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8535 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8540 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8541 inserting them on the loops preheader edge. Returns true if we
8542 were successful in doing so (and thus STMT_INFO can be moved then),
8543 otherwise returns false. */
8546 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8552 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8554 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8555 if (!gimple_nop_p (def_stmt
)
8556 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8558 /* Make sure we don't need to recurse. While we could do
8559 so in simple cases when there are more complex use webs
8560 we don't have an easy way to preserve stmt order to fulfil
8561 dependencies within them. */
8564 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8566 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8568 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8569 if (!gimple_nop_p (def_stmt2
)
8570 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8580 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8582 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8583 if (!gimple_nop_p (def_stmt
)
8584 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8586 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8587 gsi_remove (&gsi
, false);
8588 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8595 /* vectorizable_load.
8597 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8598 that can be vectorized.
8599 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8600 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8601 Return true if STMT_INFO is vectorizable in this way. */
8604 vectorizable_load (vec_info
*vinfo
,
8605 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8606 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
8607 stmt_vector_for_cost
*cost_vec
)
8610 tree vec_dest
= NULL
;
8611 tree data_ref
= NULL
;
8612 stmt_vec_info prev_stmt_info
;
8613 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8614 class loop
*loop
= NULL
;
8615 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8616 bool nested_in_vect_loop
= false;
8621 enum dr_alignment_support alignment_support_scheme
;
8622 tree dataref_ptr
= NULL_TREE
;
8623 tree dataref_offset
= NULL_TREE
;
8624 gimple
*ptr_incr
= NULL
;
8627 unsigned int group_size
;
8628 poly_uint64 group_gap_adj
;
8629 tree msq
= NULL_TREE
, lsq
;
8630 tree offset
= NULL_TREE
;
8631 tree byte_offset
= NULL_TREE
;
8632 tree realignment_token
= NULL_TREE
;
8634 vec
<tree
> dr_chain
= vNULL
;
8635 bool grouped_load
= false;
8636 stmt_vec_info first_stmt_info
;
8637 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8638 bool compute_in_loop
= false;
8639 class loop
*at_loop
;
8641 bool slp
= (slp_node
!= NULL
);
8642 bool slp_perm
= false;
8643 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8646 gather_scatter_info gs_info
;
8648 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8650 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8653 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8657 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8658 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8660 scalar_dest
= gimple_assign_lhs (assign
);
8661 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8664 tree_code code
= gimple_assign_rhs_code (assign
);
8665 if (code
!= ARRAY_REF
8666 && code
!= BIT_FIELD_REF
8667 && code
!= INDIRECT_REF
8668 && code
!= COMPONENT_REF
8669 && code
!= IMAGPART_EXPR
8670 && code
!= REALPART_EXPR
8672 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8677 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8678 if (!call
|| !gimple_call_internal_p (call
))
8681 internal_fn ifn
= gimple_call_internal_fn (call
);
8682 if (!internal_load_fn_p (ifn
))
8685 scalar_dest
= gimple_call_lhs (call
);
8689 int mask_index
= internal_fn_mask_index (ifn
);
8690 if (mask_index
>= 0)
8692 mask
= gimple_call_arg (call
, mask_index
);
8693 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
8699 if (!STMT_VINFO_DATA_REF (stmt_info
))
8702 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8703 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8707 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8708 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8709 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8714 /* Multiple types in SLP are handled by creating the appropriate number of
8715 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8720 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8722 gcc_assert (ncopies
>= 1);
8724 /* FORNOW. This restriction should be relaxed. */
8725 if (nested_in_vect_loop
&& ncopies
> 1)
8727 if (dump_enabled_p ())
8728 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8729 "multiple types in nested loop.\n");
8733 /* Invalidate assumptions made by dependence analysis when vectorization
8734 on the unrolled body effectively re-orders stmts. */
8736 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8737 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8738 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8740 if (dump_enabled_p ())
8741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8742 "cannot perform implicit CSE when unrolling "
8743 "with negative dependence distance\n");
8747 elem_type
= TREE_TYPE (vectype
);
8748 mode
= TYPE_MODE (vectype
);
8750 /* FORNOW. In some cases can vectorize even if data-type not supported
8751 (e.g. - data copies). */
8752 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8754 if (dump_enabled_p ())
8755 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8756 "Aligned load, but unsupported type.\n");
8760 /* Check if the load is a part of an interleaving chain. */
8761 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8763 grouped_load
= true;
8765 gcc_assert (!nested_in_vect_loop
);
8766 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8768 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8769 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8771 /* Refuse non-SLP vectorization of SLP-only groups. */
8772 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8774 if (dump_enabled_p ())
8775 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8776 "cannot vectorize load in non-SLP mode.\n");
8780 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8786 /* In BB vectorization we may not actually use a loaded vector
8787 accessing elements in excess of DR_GROUP_SIZE. */
8788 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8789 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8790 unsigned HOST_WIDE_INT nunits
;
8791 unsigned j
, k
, maxk
= 0;
8792 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8795 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
8796 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8797 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8799 if (dump_enabled_p ())
8800 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8801 "BB vectorization with gaps at the end of "
8802 "a load is not supported\n");
8809 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8812 if (dump_enabled_p ())
8813 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8815 "unsupported load permutation\n");
8820 /* Invalidate assumptions made by dependence analysis when vectorization
8821 on the unrolled body effectively re-orders stmts. */
8822 if (!PURE_SLP_STMT (stmt_info
)
8823 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8824 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8825 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8827 if (dump_enabled_p ())
8828 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8829 "cannot perform implicit CSE when performing "
8830 "group loads with negative dependence distance\n");
8837 vect_memory_access_type memory_access_type
;
8838 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp
, mask
, VLS_LOAD
,
8839 ncopies
, &memory_access_type
, &gs_info
))
8844 if (memory_access_type
== VMAT_CONTIGUOUS
)
8846 machine_mode vec_mode
= TYPE_MODE (vectype
);
8847 if (!VECTOR_MODE_P (vec_mode
)
8848 || !can_vec_mask_load_store_p (vec_mode
,
8849 TYPE_MODE (mask_vectype
), true))
8852 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8853 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8855 if (dump_enabled_p ())
8856 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8857 "unsupported access type for masked load.\n");
8862 if (!vec_stmt
) /* transformation not required. */
8865 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8868 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
8869 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
8870 memory_access_type
, &gs_info
, mask
);
8872 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8873 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8874 slp_node
, cost_vec
);
8879 gcc_assert (memory_access_type
8880 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8882 if (dump_enabled_p ())
8883 dump_printf_loc (MSG_NOTE
, vect_location
,
8884 "transform load. ncopies = %d\n", ncopies
);
8888 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8889 ensure_base_align (dr_info
);
8891 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8893 vect_build_gather_load_calls (vinfo
,
8894 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8898 if (memory_access_type
== VMAT_INVARIANT
)
8900 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8901 /* If we have versioned for aliasing or the loop doesn't
8902 have any data dependencies that would preclude this,
8903 then we are sure this is a loop invariant load and
8904 thus we can insert it on the preheader edge. */
8905 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8906 && !nested_in_vect_loop
8907 && hoist_defs_of_uses (stmt_info
, loop
));
8910 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8911 if (dump_enabled_p ())
8912 dump_printf_loc (MSG_NOTE
, vect_location
,
8913 "hoisting out of the vectorized loop: %G", stmt
);
8914 scalar_dest
= copy_ssa_name (scalar_dest
);
8915 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8916 gsi_insert_on_edge_immediate
8917 (loop_preheader_edge (loop
),
8918 gimple_build_assign (scalar_dest
, rhs
));
8920 /* These copies are all equivalent, but currently the representation
8921 requires a separate STMT_VINFO_VEC_STMT for each one. */
8922 prev_stmt_info
= NULL
;
8923 gimple_stmt_iterator gsi2
= *gsi
;
8925 for (j
= 0; j
< ncopies
; j
++)
8927 stmt_vec_info new_stmt_info
;
8930 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8932 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8933 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
8937 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8939 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8942 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8944 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8946 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8947 prev_stmt_info
= new_stmt_info
;
8952 if (memory_access_type
== VMAT_ELEMENTWISE
8953 || memory_access_type
== VMAT_STRIDED_SLP
)
8955 gimple_stmt_iterator incr_gsi
;
8961 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8962 tree stride_base
, stride_step
, alias_off
;
8963 /* Checked by get_load_store_type. */
8964 unsigned int const_nunits
= nunits
.to_constant ();
8965 unsigned HOST_WIDE_INT cst_offset
= 0;
8968 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8969 gcc_assert (!nested_in_vect_loop
);
8973 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8974 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8978 first_stmt_info
= stmt_info
;
8979 first_dr_info
= dr_info
;
8981 if (slp
&& grouped_load
)
8983 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8984 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8990 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8991 * vect_get_place_in_interleaving_chain (stmt_info
,
8994 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8997 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8999 = fold_build_pointer_plus
9000 (DR_BASE_ADDRESS (first_dr_info
->dr
),
9001 size_binop (PLUS_EXPR
,
9002 convert_to_ptrofftype (dr_offset
),
9003 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
9004 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
9006 /* For a load with loop-invariant (but other than power-of-2)
9007 stride (i.e. not a grouped access) like so:
9009 for (i = 0; i < n; i += stride)
9012 we generate a new induction variable and new accesses to
9013 form a new vector (or vectors, depending on ncopies):
9015 for (j = 0; ; j += VF*stride)
9017 tmp2 = array[j + stride];
9019 vectemp = {tmp1, tmp2, ...}
9022 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
9023 build_int_cst (TREE_TYPE (stride_step
), vf
));
9025 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
9027 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
9028 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
9029 create_iv (stride_base
, ivstep
, NULL
,
9030 loop
, &incr_gsi
, insert_after
,
9032 incr
= gsi_stmt (incr_gsi
);
9033 loop_vinfo
->add_stmt (incr
);
9035 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9037 prev_stmt_info
= NULL
;
9038 running_off
= offvar
;
9039 alias_off
= build_int_cst (ref_type
, 0);
9040 int nloads
= const_nunits
;
9042 tree ltype
= TREE_TYPE (vectype
);
9043 tree lvectype
= vectype
;
9044 auto_vec
<tree
> dr_chain
;
9045 if (memory_access_type
== VMAT_STRIDED_SLP
)
9047 if (group_size
< const_nunits
)
9049 /* First check if vec_init optab supports construction from vector
9050 elts directly. Otherwise avoid emitting a constructor of
9051 vector elements by performing the loads using an integer type
9052 of the same size, constructing a vector of those and then
9053 re-interpreting it as the original vector type. This avoids a
9054 huge runtime penalty due to the general inability to perform
9055 store forwarding from smaller stores to a larger load. */
9058 = vector_vector_composition_type (vectype
,
9059 const_nunits
/ group_size
,
9061 if (vtype
!= NULL_TREE
)
9063 nloads
= const_nunits
/ group_size
;
9072 lnel
= const_nunits
;
9075 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9077 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9078 else if (nloads
== 1)
9083 /* For SLP permutation support we need to load the whole group,
9084 not only the number of vector stmts the permutation result
9088 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9090 unsigned int const_vf
= vf
.to_constant ();
9091 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9092 dr_chain
.create (ncopies
);
9095 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9097 unsigned int group_el
= 0;
9098 unsigned HOST_WIDE_INT
9099 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9100 for (j
= 0; j
< ncopies
; j
++)
9103 vec_alloc (v
, nloads
);
9104 stmt_vec_info new_stmt_info
= NULL
;
9105 for (i
= 0; i
< nloads
; i
++)
9107 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9108 group_el
* elsz
+ cst_offset
);
9109 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9110 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9112 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9114 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9116 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9117 gimple_assign_lhs (new_stmt
));
9121 || group_el
== group_size
)
9123 tree newoff
= copy_ssa_name (running_off
);
9124 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9125 running_off
, stride_step
);
9126 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9128 running_off
= newoff
;
9134 tree vec_inv
= build_constructor (lvectype
, v
);
9135 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9136 vec_inv
, lvectype
, gsi
);
9137 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9138 if (lvectype
!= vectype
)
9141 = gimple_build_assign (make_ssa_name (vectype
),
9143 build1 (VIEW_CONVERT_EXPR
,
9144 vectype
, new_temp
));
9146 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9154 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
9156 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9161 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9163 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9164 prev_stmt_info
= new_stmt_info
;
9170 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9176 if (memory_access_type
== VMAT_GATHER_SCATTER
9177 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9178 grouped_load
= false;
9182 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9183 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9184 /* For SLP vectorization we directly vectorize a subchain
9185 without permutation. */
9186 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9187 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9188 /* For BB vectorization always use the first stmt to base
9189 the data ref pointer on. */
9191 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9193 /* Check if the chain of loads is already vectorized. */
9194 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
9195 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9196 ??? But we can only do so if there is exactly one
9197 as we have no way to get at the rest. Leave the CSE
9199 ??? With the group load eventually participating
9200 in multiple different permutations (having multiple
9201 slp nodes which refer to the same group) the CSE
9202 is even wrong code. See PR56270. */
9205 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9208 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9211 /* VEC_NUM is the number of vect stmts to be created for this group. */
9214 grouped_load
= false;
9215 /* If an SLP permutation is from N elements to N elements,
9216 and if one vector holds a whole number of N, we can load
9217 the inputs to the permutation in the same way as an
9218 unpermuted sequence. In other cases we need to load the
9219 whole group, not only the number of vector stmts the
9220 permutation result fits in. */
9221 unsigned scalar_lanes
= SLP_TREE_SCALAR_STMTS (slp_node
).length ();
9223 && (group_size
!= scalar_lanes
9224 || !multiple_p (nunits
, group_size
)))
9226 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9227 variable VF; see vect_transform_slp_perm_load. */
9228 unsigned int const_vf
= vf
.to_constant ();
9229 unsigned int const_nunits
= nunits
.to_constant ();
9230 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9231 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9235 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9237 = group_size
- scalar_lanes
;
9241 vec_num
= group_size
;
9243 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9247 first_stmt_info
= stmt_info
;
9248 first_dr_info
= dr_info
;
9249 group_size
= vec_num
= 1;
9251 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9254 /* Gather-scatter accesses perform only component accesses, alignment
9255 is irrelevant for them. */
9256 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9257 alignment_support_scheme
= dr_unaligned_supported
;
9259 alignment_support_scheme
9260 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
9262 gcc_assert (alignment_support_scheme
);
9263 vec_loop_masks
*loop_masks
9264 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9265 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9267 /* Targets with store-lane instructions must not require explicit
9268 realignment. vect_supportable_dr_alignment always returns either
9269 dr_aligned or dr_unaligned_supported for masked operations. */
9270 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9273 || alignment_support_scheme
== dr_aligned
9274 || alignment_support_scheme
== dr_unaligned_supported
);
9276 /* In case the vectorization factor (VF) is bigger than the number
9277 of elements that we can fit in a vectype (nunits), we have to generate
9278 more than one vector stmt - i.e - we need to "unroll" the
9279 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9280 from one copy of the vector stmt to the next, in the field
9281 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9282 stages to find the correct vector defs to be used when vectorizing
9283 stmts that use the defs of the current stmt. The example below
9284 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9285 need to create 4 vectorized stmts):
9287 before vectorization:
9288 RELATED_STMT VEC_STMT
9292 step 1: vectorize stmt S1:
9293 We first create the vector stmt VS1_0, and, as usual, record a
9294 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9295 Next, we create the vector stmt VS1_1, and record a pointer to
9296 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9297 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9299 RELATED_STMT VEC_STMT
9300 VS1_0: vx0 = memref0 VS1_1 -
9301 VS1_1: vx1 = memref1 VS1_2 -
9302 VS1_2: vx2 = memref2 VS1_3 -
9303 VS1_3: vx3 = memref3 - -
9304 S1: x = load - VS1_0
9307 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9308 information we recorded in RELATED_STMT field is used to vectorize
9311 /* In case of interleaving (non-unit grouped access):
9318 Vectorized loads are created in the order of memory accesses
9319 starting from the access of the first stmt of the chain:
9322 VS2: vx1 = &base + vec_size*1
9323 VS3: vx3 = &base + vec_size*2
9324 VS4: vx4 = &base + vec_size*3
9326 Then permutation statements are generated:
9328 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9329 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9332 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9333 (the order of the data-refs in the output of vect_permute_load_chain
9334 corresponds to the order of scalar stmts in the interleaving chain - see
9335 the documentation of vect_permute_load_chain()).
9336 The generation of permutation stmts and recording them in
9337 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9339 In case of both multiple types and interleaving, the vector loads and
9340 permutation stmts above are created for every copy. The result vector
9341 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9342 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9344 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9345 on a target that supports unaligned accesses (dr_unaligned_supported)
9346 we generate the following code:
9350 p = p + indx * vectype_size;
9355 Otherwise, the data reference is potentially unaligned on a target that
9356 does not support unaligned accesses (dr_explicit_realign_optimized) -
9357 then generate the following code, in which the data in each iteration is
9358 obtained by two vector loads, one from the previous iteration, and one
9359 from the current iteration:
9361 msq_init = *(floor(p1))
9362 p2 = initial_addr + VS - 1;
9363 realignment_token = call target_builtin;
9366 p2 = p2 + indx * vectype_size
9368 vec_dest = realign_load (msq, lsq, realignment_token)
9373 /* If the misalignment remains the same throughout the execution of the
9374 loop, we can create the init_addr and permutation mask at the loop
9375 preheader. Otherwise, it needs to be created inside the loop.
9376 This can only occur when vectorizing memory accesses in the inner-loop
9377 nested within an outer-loop that is being vectorized. */
9379 if (nested_in_vect_loop
9380 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9381 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9383 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9384 compute_in_loop
= true;
9387 bool diff_first_stmt_info
9388 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9390 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9391 || alignment_support_scheme
== dr_explicit_realign
)
9392 && !compute_in_loop
)
9394 /* If we have different first_stmt_info, we can't set up realignment
9395 here, since we can't guarantee first_stmt_info DR has been
9396 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9397 distance from first_stmt_info DR instead as below. */
9398 if (!diff_first_stmt_info
)
9399 msq
= vect_setup_realignment (vinfo
,
9400 first_stmt_info
, gsi
, &realignment_token
,
9401 alignment_support_scheme
, NULL_TREE
,
9403 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9405 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9406 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9408 gcc_assert (!first_stmt_info_for_drptr
);
9414 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9415 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9418 tree vec_offset
= NULL_TREE
;
9419 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9421 aggr_type
= NULL_TREE
;
9424 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9426 aggr_type
= elem_type
;
9427 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9428 &bump
, &vec_offset
);
9432 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9433 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9435 aggr_type
= vectype
;
9436 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9437 memory_access_type
);
9440 tree vec_mask
= NULL_TREE
;
9441 prev_stmt_info
= NULL
;
9442 poly_uint64 group_elt
= 0;
9443 for (j
= 0; j
< ncopies
; j
++)
9445 stmt_vec_info new_stmt_info
= NULL
;
9446 /* 1. Create the vector or array pointer update chain. */
9449 bool simd_lane_access_p
9450 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9451 if (simd_lane_access_p
9452 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9453 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9454 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9455 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9456 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9457 get_alias_set (TREE_TYPE (ref_type
)))
9458 && (alignment_support_scheme
== dr_aligned
9459 || alignment_support_scheme
== dr_unaligned_supported
))
9461 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9462 dataref_offset
= build_int_cst (ref_type
, 0);
9464 else if (diff_first_stmt_info
)
9467 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9468 aggr_type
, at_loop
, offset
, &dummy
,
9469 gsi
, &ptr_incr
, simd_lane_access_p
,
9471 /* Adjust the pointer by the difference to first_stmt. */
9472 data_reference_p ptrdr
9473 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9475 = fold_convert (sizetype
,
9476 size_binop (MINUS_EXPR
,
9477 DR_INIT (first_dr_info
->dr
),
9479 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9481 if (alignment_support_scheme
== dr_explicit_realign
)
9483 msq
= vect_setup_realignment (vinfo
,
9484 first_stmt_info_for_drptr
, gsi
,
9486 alignment_support_scheme
,
9487 dataref_ptr
, &at_loop
);
9488 gcc_assert (!compute_in_loop
);
9491 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9492 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
9493 &dataref_ptr
, &vec_offset
);
9496 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9498 offset
, &dummy
, gsi
, &ptr_incr
,
9505 auto_vec
<vec
<tree
> > vec_defs (1);
9506 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
9507 vec_mask
= vec_defs
[0][0];
9510 vec_mask
= vect_get_vec_def_for_operand (vinfo
, mask
, stmt_info
,
9517 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9519 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9520 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
9522 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9525 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
9528 if (grouped_load
|| slp_perm
)
9529 dr_chain
.create (vec_num
);
9531 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9535 vec_array
= create_vector_array (vectype
, vec_num
);
9537 tree final_mask
= NULL_TREE
;
9539 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9542 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9549 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9551 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9552 tree alias_ptr
= build_int_cst (ref_type
, align
);
9553 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9554 dataref_ptr
, alias_ptr
,
9560 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9561 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9562 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9564 gimple_call_set_lhs (call
, vec_array
);
9565 gimple_call_set_nothrow (call
, true);
9566 new_stmt_info
= vect_finish_stmt_generation (vinfo
, stmt_info
,
9569 /* Extract each vector into an SSA_NAME. */
9570 for (i
= 0; i
< vec_num
; i
++)
9572 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9574 dr_chain
.quick_push (new_temp
);
9577 /* Record the mapping between SSA_NAMEs and statements. */
9578 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9580 /* Record that VEC_ARRAY is now dead. */
9581 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9585 for (i
= 0; i
< vec_num
; i
++)
9587 tree final_mask
= NULL_TREE
;
9589 && memory_access_type
!= VMAT_INVARIANT
)
9590 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9592 vectype
, vec_num
* j
+ i
);
9594 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9598 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9599 gsi
, stmt_info
, bump
);
9601 /* 2. Create the vector-load in the loop. */
9602 gimple
*new_stmt
= NULL
;
9603 switch (alignment_support_scheme
)
9606 case dr_unaligned_supported
:
9608 unsigned int misalign
;
9609 unsigned HOST_WIDE_INT align
;
9611 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9613 tree zero
= build_zero_cst (vectype
);
9614 tree scale
= size_int (gs_info
.scale
);
9617 call
= gimple_build_call_internal
9618 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9619 vec_offset
, scale
, zero
, final_mask
);
9621 call
= gimple_build_call_internal
9622 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9623 vec_offset
, scale
, zero
);
9624 gimple_call_set_nothrow (call
, true);
9626 data_ref
= NULL_TREE
;
9631 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9632 if (alignment_support_scheme
== dr_aligned
)
9634 gcc_assert (aligned_access_p (first_dr_info
));
9637 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9639 align
= dr_alignment
9640 (vect_dr_behavior (vinfo
, first_dr_info
));
9644 misalign
= DR_MISALIGNMENT (first_dr_info
);
9645 if (dataref_offset
== NULL_TREE
9646 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9647 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9652 align
= least_bit_hwi (misalign
| align
);
9653 tree ptr
= build_int_cst (ref_type
, align
);
9655 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9658 gimple_call_set_nothrow (call
, true);
9660 data_ref
= NULL_TREE
;
9664 tree ltype
= vectype
;
9665 tree new_vtype
= NULL_TREE
;
9666 unsigned HOST_WIDE_INT gap
9667 = DR_GROUP_GAP (first_stmt_info
);
9668 unsigned int vect_align
9669 = vect_known_alignment_in_bytes (first_dr_info
);
9670 unsigned int scalar_dr_size
9671 = vect_get_scalar_dr_size (first_dr_info
);
9672 /* If there's no peeling for gaps but we have a gap
9673 with slp loads then load the lower half of the
9674 vector only. See get_group_load_store_type for
9675 when we apply this optimization. */
9678 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9680 && known_eq (nunits
, (group_size
- gap
) * 2)
9681 && known_eq (nunits
, group_size
)
9682 && gap
>= (vect_align
/ scalar_dr_size
))
9686 = vector_vector_composition_type (vectype
, 2,
9688 if (new_vtype
!= NULL_TREE
)
9692 = (dataref_offset
? dataref_offset
9693 : build_int_cst (ref_type
, 0));
9694 if (ltype
!= vectype
9695 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9697 unsigned HOST_WIDE_INT gap_offset
9698 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9699 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9700 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9703 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9704 if (alignment_support_scheme
== dr_aligned
)
9706 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9707 TREE_TYPE (data_ref
)
9708 = build_aligned_type (TREE_TYPE (data_ref
),
9709 align
* BITS_PER_UNIT
);
9711 TREE_TYPE (data_ref
)
9712 = build_aligned_type (TREE_TYPE (data_ref
),
9713 TYPE_ALIGN (elem_type
));
9714 if (ltype
!= vectype
)
9716 vect_copy_ref_info (data_ref
,
9717 DR_REF (first_dr_info
->dr
));
9718 tree tem
= make_ssa_name (ltype
);
9719 new_stmt
= gimple_build_assign (tem
, data_ref
);
9720 vect_finish_stmt_generation (vinfo
, stmt_info
,
9723 vec
<constructor_elt
, va_gc
> *v
;
9725 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9727 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9728 build_zero_cst (ltype
));
9729 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9733 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9734 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9735 build_zero_cst (ltype
));
9737 gcc_assert (new_vtype
!= NULL_TREE
);
9738 if (new_vtype
== vectype
)
9739 new_stmt
= gimple_build_assign (
9740 vec_dest
, build_constructor (vectype
, v
));
9743 tree new_vname
= make_ssa_name (new_vtype
);
9744 new_stmt
= gimple_build_assign (
9745 new_vname
, build_constructor (new_vtype
, v
));
9746 vect_finish_stmt_generation (vinfo
, stmt_info
,
9748 new_stmt
= gimple_build_assign (
9749 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9756 case dr_explicit_realign
:
9760 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9762 if (compute_in_loop
)
9763 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9765 dr_explicit_realign
,
9768 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9769 ptr
= copy_ssa_name (dataref_ptr
);
9771 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9772 // For explicit realign the target alignment should be
9773 // known at compile time.
9774 unsigned HOST_WIDE_INT align
=
9775 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9776 new_stmt
= gimple_build_assign
9777 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9779 (TREE_TYPE (dataref_ptr
),
9780 -(HOST_WIDE_INT
) align
));
9781 vect_finish_stmt_generation (vinfo
, stmt_info
,
9784 = build2 (MEM_REF
, vectype
, ptr
,
9785 build_int_cst (ref_type
, 0));
9786 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9787 vec_dest
= vect_create_destination_var (scalar_dest
,
9789 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9790 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9791 gimple_assign_set_lhs (new_stmt
, new_temp
);
9792 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9793 vect_finish_stmt_generation (vinfo
, stmt_info
,
9797 bump
= size_binop (MULT_EXPR
, vs
,
9798 TYPE_SIZE_UNIT (elem_type
));
9799 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9800 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9802 new_stmt
= gimple_build_assign
9803 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9805 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9806 ptr
= copy_ssa_name (ptr
, new_stmt
);
9807 gimple_assign_set_lhs (new_stmt
, ptr
);
9808 vect_finish_stmt_generation (vinfo
, stmt_info
,
9811 = build2 (MEM_REF
, vectype
, ptr
,
9812 build_int_cst (ref_type
, 0));
9815 case dr_explicit_realign_optimized
:
9817 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9818 new_temp
= copy_ssa_name (dataref_ptr
);
9820 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9821 // We should only be doing this if we know the target
9822 // alignment at compile time.
9823 unsigned HOST_WIDE_INT align
=
9824 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9825 new_stmt
= gimple_build_assign
9826 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9827 build_int_cst (TREE_TYPE (dataref_ptr
),
9828 -(HOST_WIDE_INT
) align
));
9829 vect_finish_stmt_generation (vinfo
, stmt_info
,
9832 = build2 (MEM_REF
, vectype
, new_temp
,
9833 build_int_cst (ref_type
, 0));
9839 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9840 /* DATA_REF is null if we've already built the statement. */
9843 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9844 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9846 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9847 gimple_set_lhs (new_stmt
, new_temp
);
9849 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9852 /* 3. Handle explicit realignment if necessary/supported.
9854 vec_dest = realign_load (msq, lsq, realignment_token) */
9855 if (alignment_support_scheme
== dr_explicit_realign_optimized
9856 || alignment_support_scheme
== dr_explicit_realign
)
9858 lsq
= gimple_assign_lhs (new_stmt
);
9859 if (!realignment_token
)
9860 realignment_token
= dataref_ptr
;
9861 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9862 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9863 msq
, lsq
, realignment_token
);
9864 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9865 gimple_assign_set_lhs (new_stmt
, new_temp
);
9867 = vect_finish_stmt_generation (vinfo
, stmt_info
,
9870 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9873 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9874 add_phi_arg (phi
, lsq
,
9875 loop_latch_edge (containing_loop
),
9881 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9883 tree perm_mask
= perm_mask_for_reverse (vectype
);
9884 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9885 perm_mask
, stmt_info
, gsi
);
9886 new_stmt_info
= vinfo
->lookup_def (new_temp
);
9889 /* Collect vector loads and later create their permutation in
9890 vect_transform_grouped_load (). */
9891 if (grouped_load
|| slp_perm
)
9892 dr_chain
.quick_push (new_temp
);
9894 /* Store vector loads in the corresponding SLP_NODE. */
9895 if (slp
&& !slp_perm
)
9896 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9898 /* With SLP permutation we load the gaps as well, without
9899 we need to skip the gaps after we manage to fully load
9900 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9901 group_elt
+= nunits
;
9902 if (maybe_ne (group_gap_adj
, 0U)
9904 && known_eq (group_elt
, group_size
- group_gap_adj
))
9906 poly_wide_int bump_val
9907 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9909 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9910 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9911 gsi
, stmt_info
, bump
);
9915 /* Bump the vector pointer to account for a gap or for excess
9916 elements loaded for a permuted SLP load. */
9917 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9919 poly_wide_int bump_val
9920 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9922 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9923 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9928 if (slp
&& !slp_perm
)
9934 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
9935 gsi
, vf
, false, &n_perms
);
9942 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9943 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9945 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9950 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9952 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9953 prev_stmt_info
= new_stmt_info
;
9956 dr_chain
.release ();
9962 /* Function vect_is_simple_cond.
9965 LOOP - the loop that is being vectorized.
9966 COND - Condition that is checked for simple use.
9969 *COMP_VECTYPE - the vector type for the comparison.
9970 *DTS - The def types for the arguments of the comparison
9972 Returns whether a COND can be vectorized. Checks whether
9973 condition operands are supportable using vec_is_simple_use. */
9976 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, slp_tree slp_node
,
9977 tree
*comp_vectype
, enum vect_def_type
*dts
,
9981 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9984 if (TREE_CODE (cond
) == SSA_NAME
9985 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9987 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
9989 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9994 if (!COMPARISON_CLASS_P (cond
))
9997 lhs
= TREE_OPERAND (cond
, 0);
9998 rhs
= TREE_OPERAND (cond
, 1);
10000 if (TREE_CODE (lhs
) == SSA_NAME
)
10002 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
10005 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10006 || TREE_CODE (lhs
) == FIXED_CST
)
10007 dts
[0] = vect_constant_def
;
10011 if (TREE_CODE (rhs
) == SSA_NAME
)
10013 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
10016 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10017 || TREE_CODE (rhs
) == FIXED_CST
)
10018 dts
[1] = vect_constant_def
;
10022 if (vectype1
&& vectype2
10023 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10024 TYPE_VECTOR_SUBPARTS (vectype2
)))
10027 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10028 /* Invariant comparison. */
10029 if (! *comp_vectype
)
10031 tree scalar_type
= TREE_TYPE (lhs
);
10032 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10033 *comp_vectype
= truth_type_for (vectype
);
10036 /* If we can widen the comparison to match vectype do so. */
10037 if (INTEGRAL_TYPE_P (scalar_type
)
10039 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10040 TYPE_SIZE (TREE_TYPE (vectype
))))
10041 scalar_type
= build_nonstandard_integer_type
10042 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10043 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10051 /* vectorizable_condition.
10053 Check if STMT_INFO is conditional modify expression that can be vectorized.
10054 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10055 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10058 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10060 Return true if STMT_INFO is vectorizable in this way. */
10063 vectorizable_condition (vec_info
*vinfo
,
10064 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10065 stmt_vec_info
*vec_stmt
,
10066 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10068 tree scalar_dest
= NULL_TREE
;
10069 tree vec_dest
= NULL_TREE
;
10070 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10071 tree then_clause
, else_clause
;
10072 tree comp_vectype
= NULL_TREE
;
10073 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10074 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10077 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10078 enum vect_def_type dts
[4]
10079 = {vect_unknown_def_type
, vect_unknown_def_type
,
10080 vect_unknown_def_type
, vect_unknown_def_type
};
10084 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10085 stmt_vec_info prev_stmt_info
= NULL
;
10087 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10088 vec
<tree
> vec_oprnds0
= vNULL
;
10089 vec
<tree
> vec_oprnds1
= vNULL
;
10090 vec
<tree
> vec_oprnds2
= vNULL
;
10091 vec
<tree
> vec_oprnds3
= vNULL
;
10093 bool masked
= false;
10095 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10098 /* Is vectorizable conditional operation? */
10099 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10103 code
= gimple_assign_rhs_code (stmt
);
10104 if (code
!= COND_EXPR
)
10107 stmt_vec_info reduc_info
= NULL
;
10108 int reduc_index
= -1;
10109 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10111 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10114 if (STMT_SLP_TYPE (stmt_info
))
10116 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10117 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10118 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10119 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10120 || reduc_index
!= -1);
10124 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10127 /* FORNOW: only supported as part of a reduction. */
10128 if (STMT_VINFO_LIVE_P (stmt_info
))
10130 if (dump_enabled_p ())
10131 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10132 "value used after loop.\n");
10137 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10138 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10143 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10147 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10151 gcc_assert (ncopies
>= 1);
10152 if (for_reduction
&& ncopies
> 1)
10153 return false; /* FORNOW */
10155 cond_expr
= gimple_assign_rhs1 (stmt
);
10157 if (!vect_is_simple_cond (cond_expr
, vinfo
, slp_node
,
10158 &comp_vectype
, &dts
[0], vectype
)
10162 unsigned slp_adjust
= 0;
10163 if (slp_node
&& SLP_TREE_CHILDREN (slp_node
).length () == 4)
10164 /* ??? Hack. Hope for COND_EXPR GIMPLE sanitizing or refactor
10167 slp_tree then_slp_node
, else_slp_node
;
10168 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + slp_adjust
,
10169 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10171 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + slp_adjust
,
10172 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10175 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10178 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10181 masked
= !COMPARISON_CLASS_P (cond_expr
);
10182 vec_cmp_type
= truth_type_for (comp_vectype
);
10184 if (vec_cmp_type
== NULL_TREE
)
10187 cond_code
= TREE_CODE (cond_expr
);
10190 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10191 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10194 /* For conditional reductions, the "then" value needs to be the candidate
10195 value calculated by this iteration while the "else" value needs to be
10196 the result carried over from previous iterations. If the COND_EXPR
10197 is the other way around, we need to swap it. */
10198 bool must_invert_cmp_result
= false;
10199 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10202 must_invert_cmp_result
= true;
10205 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10206 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10207 if (new_code
== ERROR_MARK
)
10208 must_invert_cmp_result
= true;
10211 cond_code
= new_code
;
10212 /* Make sure we don't accidentally use the old condition. */
10213 cond_expr
= NULL_TREE
;
10216 std::swap (then_clause
, else_clause
);
10219 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10221 /* Boolean values may have another representation in vectors
10222 and therefore we prefer bit operations over comparison for
10223 them (which also works for scalar masks). We store opcodes
10224 to use in bitop1 and bitop2. Statement is vectorized as
10225 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10226 depending on bitop1 and bitop2 arity. */
10230 bitop1
= BIT_NOT_EXPR
;
10231 bitop2
= BIT_AND_EXPR
;
10234 bitop1
= BIT_NOT_EXPR
;
10235 bitop2
= BIT_IOR_EXPR
;
10238 bitop1
= BIT_NOT_EXPR
;
10239 bitop2
= BIT_AND_EXPR
;
10240 std::swap (cond_expr0
, cond_expr1
);
10243 bitop1
= BIT_NOT_EXPR
;
10244 bitop2
= BIT_IOR_EXPR
;
10245 std::swap (cond_expr0
, cond_expr1
);
10248 bitop1
= BIT_XOR_EXPR
;
10251 bitop1
= BIT_XOR_EXPR
;
10252 bitop2
= BIT_NOT_EXPR
;
10257 cond_code
= SSA_NAME
;
10260 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10261 && reduction_type
== EXTRACT_LAST_REDUCTION
10262 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10264 if (dump_enabled_p ())
10265 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10266 "reduction comparison operation not supported.\n");
10272 if (bitop1
!= NOP_EXPR
)
10274 machine_mode mode
= TYPE_MODE (comp_vectype
);
10277 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10278 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10281 if (bitop2
!= NOP_EXPR
)
10283 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10285 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10290 vect_cost_for_stmt kind
= vector_stmt
;
10291 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10292 /* Count one reduction-like operation per vector. */
10293 kind
= vec_to_scalar
;
10294 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10298 && (!vect_maybe_update_slp_op_vectype
10299 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10300 || (slp_adjust
== 1
10301 && !vect_maybe_update_slp_op_vectype
10302 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10303 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10304 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10306 if (dump_enabled_p ())
10307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10308 "incompatible vector types for invariants\n");
10313 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
10314 && reduction_type
== EXTRACT_LAST_REDUCTION
)
10315 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10316 ncopies
* vec_num
, vectype
, NULL
);
10318 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10319 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10328 vec_oprnds0
.create (1);
10329 vec_oprnds1
.create (1);
10330 vec_oprnds2
.create (1);
10331 vec_oprnds3
.create (1);
10335 scalar_dest
= gimple_assign_lhs (stmt
);
10336 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10337 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10339 /* Handle cond expr. */
10340 for (j
= 0; j
< ncopies
; j
++)
10342 bool swap_cond_operands
= false;
10344 /* See whether another part of the vectorized code applies a loop
10345 mask to the condition, or to its inverse. */
10347 vec_loop_masks
*masks
= NULL
;
10348 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10350 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10351 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10354 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10355 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10356 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10359 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10360 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10361 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10363 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10364 cond_code
= cond
.code
;
10365 swap_cond_operands
= true;
10371 stmt_vec_info new_stmt_info
= NULL
;
10376 auto_vec
<vec
<tree
>, 4> vec_defs
;
10377 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
10378 vec_oprnds3
= vec_defs
.pop ();
10379 vec_oprnds2
= vec_defs
.pop ();
10381 vec_oprnds1
= vec_defs
.pop ();
10382 vec_oprnds0
= vec_defs
.pop ();
10389 = vect_get_vec_def_for_operand (vinfo
, cond_expr
, stmt_info
,
10395 = vect_get_vec_def_for_operand (vinfo
, cond_expr0
,
10396 stmt_info
, comp_vectype
);
10398 = vect_get_vec_def_for_operand (vinfo
, cond_expr1
,
10399 stmt_info
, comp_vectype
);
10401 vec_then_clause
= vect_get_vec_def_for_operand (vinfo
,
10404 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10405 vec_else_clause
= vect_get_vec_def_for_operand (vinfo
,
10413 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
10416 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
10418 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10419 vec_oprnds2
.pop ());
10420 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
10421 vec_oprnds3
.pop ());
10426 vec_oprnds0
.quick_push (vec_cond_lhs
);
10428 vec_oprnds1
.quick_push (vec_cond_rhs
);
10429 vec_oprnds2
.quick_push (vec_then_clause
);
10430 vec_oprnds3
.quick_push (vec_else_clause
);
10433 /* Arguments are ready. Create the new vector stmt. */
10434 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10436 vec_then_clause
= vec_oprnds2
[i
];
10437 vec_else_clause
= vec_oprnds3
[i
];
10439 if (swap_cond_operands
)
10440 std::swap (vec_then_clause
, vec_else_clause
);
10443 vec_compare
= vec_cond_lhs
;
10446 vec_cond_rhs
= vec_oprnds1
[i
];
10447 if (bitop1
== NOP_EXPR
)
10448 vec_compare
= build2 (cond_code
, vec_cmp_type
,
10449 vec_cond_lhs
, vec_cond_rhs
);
10452 new_temp
= make_ssa_name (vec_cmp_type
);
10454 if (bitop1
== BIT_NOT_EXPR
)
10455 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10459 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10461 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10462 if (bitop2
== NOP_EXPR
)
10463 vec_compare
= new_temp
;
10464 else if (bitop2
== BIT_NOT_EXPR
)
10466 /* Instead of doing ~x ? y : z do x ? z : y. */
10467 vec_compare
= new_temp
;
10468 std::swap (vec_then_clause
, vec_else_clause
);
10472 vec_compare
= make_ssa_name (vec_cmp_type
);
10474 = gimple_build_assign (vec_compare
, bitop2
,
10475 vec_cond_lhs
, new_temp
);
10476 vect_finish_stmt_generation (vinfo
, stmt_info
,
10482 /* If we decided to apply a loop mask to the result of the vector
10483 comparison, AND the comparison with the mask now. Later passes
10484 should then be able to reuse the AND results between mulitple
10488 for (int i = 0; i < 100; ++i)
10489 x[i] = y[i] ? z[i] : 10;
10491 results in following optimized GIMPLE:
10493 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10494 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10495 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10496 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10497 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10498 vect_iftmp.11_47, { 10, ... }>;
10500 instead of using a masked and unmasked forms of
10501 vec != { 0, ... } (masked in the MASK_LOAD,
10502 unmasked in the VEC_COND_EXPR). */
10504 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10505 in cases where that's necessary. */
10507 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10509 if (!is_gimple_val (vec_compare
))
10511 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10512 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10514 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10515 vec_compare
= vec_compare_name
;
10518 if (must_invert_cmp_result
)
10520 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10521 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10524 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10525 vec_compare
= vec_compare_name
;
10530 unsigned vec_num
= vec_oprnds0
.length ();
10532 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10533 vectype
, vec_num
* j
+ i
);
10534 tree tmp2
= make_ssa_name (vec_cmp_type
);
10536 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10538 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10539 vec_compare
= tmp2
;
10543 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10545 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10546 tree lhs
= gimple_get_lhs (old_stmt
);
10547 gcall
*new_stmt
= gimple_build_call_internal
10548 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10550 gimple_call_set_lhs (new_stmt
, lhs
);
10551 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10552 if (old_stmt
== gsi_stmt (*gsi
))
10553 new_stmt_info
= vect_finish_replace_stmt (vinfo
,
10554 stmt_info
, new_stmt
);
10557 /* In this case we're moving the definition to later in the
10558 block. That doesn't matter because the only uses of the
10559 lhs are in phi statements. */
10560 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10561 gsi_remove (&old_gsi
, true);
10563 = vect_finish_stmt_generation (vinfo
, stmt_info
,
10569 new_temp
= make_ssa_name (vec_dest
);
10571 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10572 vec_then_clause
, vec_else_clause
);
10574 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10577 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10584 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10586 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10588 prev_stmt_info
= new_stmt_info
;
10591 vec_oprnds0
.release ();
10592 vec_oprnds1
.release ();
10593 vec_oprnds2
.release ();
10594 vec_oprnds3
.release ();
10599 /* vectorizable_comparison.
10601 Check if STMT_INFO is comparison expression that can be vectorized.
10602 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10603 comparison, put it in VEC_STMT, and insert it at GSI.
10605 Return true if STMT_INFO is vectorizable in this way. */
10608 vectorizable_comparison (vec_info
*vinfo
,
10609 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10610 stmt_vec_info
*vec_stmt
,
10611 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10613 tree lhs
, rhs1
, rhs2
;
10614 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10615 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10616 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10618 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10619 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10621 poly_uint64 nunits
;
10623 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10624 stmt_vec_info prev_stmt_info
= NULL
;
10626 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10627 vec
<tree
> vec_oprnds0
= vNULL
;
10628 vec
<tree
> vec_oprnds1
= vNULL
;
10632 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10635 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10638 mask_type
= vectype
;
10639 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10644 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10646 gcc_assert (ncopies
>= 1);
10647 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10650 if (STMT_VINFO_LIVE_P (stmt_info
))
10652 if (dump_enabled_p ())
10653 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10654 "value used after loop.\n");
10658 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10662 code
= gimple_assign_rhs_code (stmt
);
10664 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10667 slp_tree slp_rhs1
, slp_rhs2
;
10668 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10669 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10672 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10673 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10676 if (vectype1
&& vectype2
10677 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10678 TYPE_VECTOR_SUBPARTS (vectype2
)))
10681 vectype
= vectype1
? vectype1
: vectype2
;
10683 /* Invariant comparison. */
10686 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10687 vectype
= mask_type
;
10689 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10691 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10694 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10697 /* Can't compare mask and non-mask types. */
10698 if (vectype1
&& vectype2
10699 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10702 /* Boolean values may have another representation in vectors
10703 and therefore we prefer bit operations over comparison for
10704 them (which also works for scalar masks). We store opcodes
10705 to use in bitop1 and bitop2. Statement is vectorized as
10706 BITOP2 (rhs1 BITOP1 rhs2) or
10707 rhs1 BITOP2 (BITOP1 rhs2)
10708 depending on bitop1 and bitop2 arity. */
10709 bool swap_p
= false;
10710 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10712 if (code
== GT_EXPR
)
10714 bitop1
= BIT_NOT_EXPR
;
10715 bitop2
= BIT_AND_EXPR
;
10717 else if (code
== GE_EXPR
)
10719 bitop1
= BIT_NOT_EXPR
;
10720 bitop2
= BIT_IOR_EXPR
;
10722 else if (code
== LT_EXPR
)
10724 bitop1
= BIT_NOT_EXPR
;
10725 bitop2
= BIT_AND_EXPR
;
10728 else if (code
== LE_EXPR
)
10730 bitop1
= BIT_NOT_EXPR
;
10731 bitop2
= BIT_IOR_EXPR
;
10736 bitop1
= BIT_XOR_EXPR
;
10737 if (code
== EQ_EXPR
)
10738 bitop2
= BIT_NOT_EXPR
;
10744 if (bitop1
== NOP_EXPR
)
10746 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10751 machine_mode mode
= TYPE_MODE (vectype
);
10754 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10755 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10758 if (bitop2
!= NOP_EXPR
)
10760 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10761 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10766 /* Put types on constant and invariant SLP children. */
10768 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10769 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10771 if (dump_enabled_p ())
10772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10773 "incompatible vector types for invariants\n");
10777 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10778 vect_model_simple_cost (vinfo
, stmt_info
,
10779 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10780 dts
, ndts
, slp_node
, cost_vec
);
10787 vec_oprnds0
.create (1);
10788 vec_oprnds1
.create (1);
10792 lhs
= gimple_assign_lhs (stmt
);
10793 mask
= vect_create_destination_var (lhs
, mask_type
);
10795 /* Handle cmp expr. */
10796 for (j
= 0; j
< ncopies
; j
++)
10798 stmt_vec_info new_stmt_info
= NULL
;
10803 auto_vec
<vec
<tree
>, 2> vec_defs
;
10804 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
10805 vec_oprnds1
= vec_defs
.pop ();
10806 vec_oprnds0
= vec_defs
.pop ();
10808 std::swap (vec_oprnds0
, vec_oprnds1
);
10812 vec_rhs1
= vect_get_vec_def_for_operand (vinfo
, rhs1
, stmt_info
,
10814 vec_rhs2
= vect_get_vec_def_for_operand (vinfo
, rhs2
, stmt_info
,
10820 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
10821 vec_oprnds0
.pop ());
10822 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
10823 vec_oprnds1
.pop ());
10828 if (swap_p
&& j
== 0)
10829 std::swap (vec_rhs1
, vec_rhs2
);
10830 vec_oprnds0
.quick_push (vec_rhs1
);
10831 vec_oprnds1
.quick_push (vec_rhs2
);
10834 /* Arguments are ready. Create the new vector stmt. */
10835 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10837 vec_rhs2
= vec_oprnds1
[i
];
10839 new_temp
= make_ssa_name (mask
);
10840 if (bitop1
== NOP_EXPR
)
10842 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
10843 vec_rhs1
, vec_rhs2
);
10845 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10850 if (bitop1
== BIT_NOT_EXPR
)
10851 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10853 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10856 = vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10857 if (bitop2
!= NOP_EXPR
)
10859 tree res
= make_ssa_name (mask
);
10860 if (bitop2
== BIT_NOT_EXPR
)
10861 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10863 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10866 = vect_finish_stmt_generation (vinfo
, stmt_info
,
10871 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
10878 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
10880 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
10882 prev_stmt_info
= new_stmt_info
;
10885 vec_oprnds0
.release ();
10886 vec_oprnds1
.release ();
10891 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10892 can handle all live statements in the node. Otherwise return true
10893 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10894 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10897 can_vectorize_live_stmts (loop_vec_info loop_vinfo
,
10898 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10899 slp_tree slp_node
, slp_instance slp_node_instance
,
10901 stmt_vector_for_cost
*cost_vec
)
10905 stmt_vec_info slp_stmt_info
;
10907 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10909 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10910 && !vectorizable_live_operation (loop_vinfo
,
10911 slp_stmt_info
, gsi
, slp_node
,
10912 slp_node_instance
, i
,
10913 vec_stmt_p
, cost_vec
))
10917 else if (STMT_VINFO_LIVE_P (stmt_info
)
10918 && !vectorizable_live_operation (loop_vinfo
, stmt_info
, gsi
,
10919 slp_node
, slp_node_instance
, -1,
10920 vec_stmt_p
, cost_vec
))
10926 /* Make sure the statement is vectorizable. */
10929 vect_analyze_stmt (vec_info
*vinfo
,
10930 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10931 slp_tree node
, slp_instance node_instance
,
10932 stmt_vector_for_cost
*cost_vec
)
10934 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10935 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10937 gimple_seq pattern_def_seq
;
10939 if (dump_enabled_p ())
10940 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10943 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10944 return opt_result::failure_at (stmt_info
->stmt
,
10946 " stmt has volatile operands: %G\n",
10949 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10951 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10953 gimple_stmt_iterator si
;
10955 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10957 stmt_vec_info pattern_def_stmt_info
10958 = vinfo
->lookup_stmt (gsi_stmt (si
));
10959 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10960 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10962 /* Analyze def stmt of STMT if it's a pattern stmt. */
10963 if (dump_enabled_p ())
10964 dump_printf_loc (MSG_NOTE
, vect_location
,
10965 "==> examining pattern def statement: %G",
10966 pattern_def_stmt_info
->stmt
);
10969 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10970 need_to_vectorize
, node
, node_instance
,
10978 /* Skip stmts that do not need to be vectorized. In loops this is expected
10980 - the COND_EXPR which is the loop exit condition
10981 - any LABEL_EXPRs in the loop
10982 - computations that are used only for array indexing or loop control.
10983 In basic blocks we only analyze statements that are a part of some SLP
10984 instance, therefore, all the statements are relevant.
10986 Pattern statement needs to be analyzed instead of the original statement
10987 if the original statement is not relevant. Otherwise, we analyze both
10988 statements. In basic blocks we are called from some SLP instance
10989 traversal, don't analyze pattern stmts instead, the pattern stmts
10990 already will be part of SLP instance. */
10992 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10993 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10994 && !STMT_VINFO_LIVE_P (stmt_info
))
10996 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10997 && pattern_stmt_info
10998 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10999 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11001 /* Analyze PATTERN_STMT instead of the original stmt. */
11002 stmt_info
= pattern_stmt_info
;
11003 if (dump_enabled_p ())
11004 dump_printf_loc (MSG_NOTE
, vect_location
,
11005 "==> examining pattern statement: %G",
11010 if (dump_enabled_p ())
11011 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
11013 return opt_result::success ();
11016 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11018 && pattern_stmt_info
11019 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11020 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11022 /* Analyze PATTERN_STMT too. */
11023 if (dump_enabled_p ())
11024 dump_printf_loc (MSG_NOTE
, vect_location
,
11025 "==> examining pattern statement: %G",
11026 pattern_stmt_info
->stmt
);
11029 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
11030 node_instance
, cost_vec
);
11035 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
11037 case vect_internal_def
:
11040 case vect_reduction_def
:
11041 case vect_nested_cycle
:
11042 gcc_assert (!bb_vinfo
11043 && (relevance
== vect_used_in_outer
11044 || relevance
== vect_used_in_outer_by_reduction
11045 || relevance
== vect_used_by_reduction
11046 || relevance
== vect_unused_in_scope
11047 || relevance
== vect_used_only_live
));
11050 case vect_induction_def
:
11051 gcc_assert (!bb_vinfo
);
11054 case vect_constant_def
:
11055 case vect_external_def
:
11056 case vect_unknown_def_type
:
11058 gcc_unreachable ();
11061 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11063 tree type
= gimple_expr_type (stmt_info
->stmt
);
11064 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
11065 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11066 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11067 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11068 *need_to_vectorize
= true;
11071 if (PURE_SLP_STMT (stmt_info
) && !node
)
11073 if (dump_enabled_p ())
11074 dump_printf_loc (MSG_NOTE
, vect_location
,
11075 "handled only by SLP analysis\n");
11076 return opt_result::success ();
11081 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11082 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11083 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11084 -mveclibabi= takes preference over library functions with
11085 the simd attribute. */
11086 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11087 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11089 || vectorizable_conversion (vinfo
, stmt_info
,
11090 NULL
, NULL
, node
, cost_vec
)
11091 || vectorizable_operation (vinfo
, stmt_info
,
11092 NULL
, NULL
, node
, cost_vec
)
11093 || vectorizable_assignment (vinfo
, stmt_info
,
11094 NULL
, NULL
, node
, cost_vec
)
11095 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11096 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11097 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11098 node
, node_instance
, cost_vec
)
11099 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11100 NULL
, NULL
, node
, cost_vec
)
11101 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11102 || vectorizable_condition (vinfo
, stmt_info
,
11103 NULL
, NULL
, node
, cost_vec
)
11104 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11106 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11107 stmt_info
, NULL
, node
));
11111 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11112 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11113 NULL
, NULL
, node
, cost_vec
)
11114 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11116 || vectorizable_shift (vinfo
, stmt_info
,
11117 NULL
, NULL
, node
, cost_vec
)
11118 || vectorizable_operation (vinfo
, stmt_info
,
11119 NULL
, NULL
, node
, cost_vec
)
11120 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11122 || vectorizable_load (vinfo
, stmt_info
,
11123 NULL
, NULL
, node
, cost_vec
)
11124 || vectorizable_store (vinfo
, stmt_info
,
11125 NULL
, NULL
, node
, cost_vec
)
11126 || vectorizable_condition (vinfo
, stmt_info
,
11127 NULL
, NULL
, node
, cost_vec
)
11128 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11133 return opt_result::failure_at (stmt_info
->stmt
,
11135 " relevant stmt not supported: %G",
11138 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11139 need extra handling, except for vectorizable reductions. */
11141 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11142 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11143 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11144 stmt_info
, NULL
, node
, node_instance
,
11146 return opt_result::failure_at (stmt_info
->stmt
,
11148 " live stmt not supported: %G",
11151 return opt_result::success ();
11155 /* Function vect_transform_stmt.
11157 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11160 vect_transform_stmt (vec_info
*vinfo
,
11161 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11162 slp_tree slp_node
, slp_instance slp_node_instance
)
11164 bool is_store
= false;
11165 stmt_vec_info vec_stmt
= NULL
;
11168 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11169 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
11171 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
11172 bool nested_p
= (loop_vinfo
11173 && nested_in_vect_loop_p
11174 (LOOP_VINFO_LOOP (loop_vinfo
), stmt_info
));
11176 gimple
*stmt
= stmt_info
->stmt
;
11177 switch (STMT_VINFO_TYPE (stmt_info
))
11179 case type_demotion_vec_info_type
:
11180 case type_promotion_vec_info_type
:
11181 case type_conversion_vec_info_type
:
11182 done
= vectorizable_conversion (vinfo
, stmt_info
,
11183 gsi
, &vec_stmt
, slp_node
, NULL
);
11187 case induc_vec_info_type
:
11188 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11189 stmt_info
, gsi
, &vec_stmt
, slp_node
,
11194 case shift_vec_info_type
:
11195 done
= vectorizable_shift (vinfo
, stmt_info
,
11196 gsi
, &vec_stmt
, slp_node
, NULL
);
11200 case op_vec_info_type
:
11201 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11206 case assignment_vec_info_type
:
11207 done
= vectorizable_assignment (vinfo
, stmt_info
,
11208 gsi
, &vec_stmt
, slp_node
, NULL
);
11212 case load_vec_info_type
:
11213 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11218 case store_vec_info_type
:
11219 done
= vectorizable_store (vinfo
, stmt_info
,
11220 gsi
, &vec_stmt
, slp_node
, NULL
);
11222 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11224 /* In case of interleaving, the whole chain is vectorized when the
11225 last store in the chain is reached. Store stmts before the last
11226 one are skipped, and there vec_stmt_info shouldn't be freed
11228 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11229 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11236 case condition_vec_info_type
:
11237 done
= vectorizable_condition (vinfo
, stmt_info
,
11238 gsi
, &vec_stmt
, slp_node
, NULL
);
11242 case comparison_vec_info_type
:
11243 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11248 case call_vec_info_type
:
11249 done
= vectorizable_call (vinfo
, stmt_info
,
11250 gsi
, &vec_stmt
, slp_node
, NULL
);
11251 stmt
= gsi_stmt (*gsi
);
11254 case call_simd_clone_vec_info_type
:
11255 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11257 stmt
= gsi_stmt (*gsi
);
11260 case reduc_vec_info_type
:
11261 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11262 gsi
, &vec_stmt
, slp_node
);
11266 case cycle_phi_info_type
:
11267 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11268 &vec_stmt
, slp_node
, slp_node_instance
);
11272 case lc_phi_info_type
:
11273 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11274 stmt_info
, &vec_stmt
, slp_node
);
11279 if (!STMT_VINFO_LIVE_P (stmt_info
))
11281 if (dump_enabled_p ())
11282 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11283 "stmt not supported.\n");
11284 gcc_unreachable ();
11289 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11290 This would break hybrid SLP vectorization. */
11292 gcc_assert (!vec_stmt
11293 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
11295 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11296 is being vectorized, but outside the immediately enclosing loop. */
11299 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11300 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
11301 || STMT_VINFO_RELEVANT (stmt_info
) ==
11302 vect_used_in_outer_by_reduction
))
11304 class loop
*innerloop
= LOOP_VINFO_LOOP (loop_vinfo
)->inner
;
11305 imm_use_iterator imm_iter
;
11306 use_operand_p use_p
;
11309 if (dump_enabled_p ())
11310 dump_printf_loc (MSG_NOTE
, vect_location
,
11311 "Record the vdef for outer-loop vectorization.\n");
11313 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11314 (to be used when vectorizing outer-loop stmts that use the DEF of
11316 if (gimple_code (stmt
) == GIMPLE_PHI
)
11317 scalar_dest
= PHI_RESULT (stmt
);
11319 scalar_dest
= gimple_get_lhs (stmt
);
11321 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
11322 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
11324 stmt_vec_info exit_phi_info
11325 = vinfo
->lookup_stmt (USE_STMT (use_p
));
11326 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
11331 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
11333 if (STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
11336 /* If this stmt defines a value used on a backedge, update the
11337 vectorized PHIs. */
11338 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
11339 stmt_vec_info reduc_info
;
11340 if (STMT_VINFO_REDUC_DEF (orig_stmt_info
)
11341 && vect_stmt_to_vectorize (orig_stmt_info
) == stmt_info
11342 && (reduc_info
= info_for_reduction (vinfo
, orig_stmt_info
))
11343 && STMT_VINFO_REDUC_TYPE (reduc_info
) != FOLD_LEFT_REDUCTION
11344 && STMT_VINFO_REDUC_TYPE (reduc_info
) != EXTRACT_LAST_REDUCTION
)
11349 && (phi
= dyn_cast
<gphi
*>
11350 (STMT_VINFO_REDUC_DEF (orig_stmt_info
)->stmt
))
11351 && dominated_by_p (CDI_DOMINATORS
,
11352 gimple_bb (orig_stmt_info
->stmt
), gimple_bb (phi
))
11353 && (e
= loop_latch_edge (gimple_bb (phi
)->loop_father
))
11354 && (PHI_ARG_DEF_FROM_EDGE (phi
, e
)
11355 == gimple_get_lhs (orig_stmt_info
->stmt
)))
11357 stmt_vec_info phi_info
11358 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info
));
11359 stmt_vec_info vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
11362 add_phi_arg (as_a
<gphi
*> (phi_info
->stmt
),
11363 gimple_get_lhs (vec_stmt
->stmt
), e
,
11364 gimple_phi_arg_location (phi
, e
->dest_idx
));
11365 phi_info
= STMT_VINFO_RELATED_STMT (phi_info
);
11366 vec_stmt
= STMT_VINFO_RELATED_STMT (vec_stmt
);
11369 gcc_assert (!vec_stmt
);
11372 && slp_node
!= slp_node_instance
->reduc_phis
)
11374 slp_tree phi_node
= slp_node_instance
->reduc_phis
;
11375 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_SCALAR_STMTS (phi_node
)[0]->stmt
);
11376 e
= loop_latch_edge (gimple_bb (phi
)->loop_father
);
11377 gcc_assert (SLP_TREE_VEC_STMTS (phi_node
).length ()
11378 == SLP_TREE_VEC_STMTS (slp_node
).length ());
11379 for (unsigned i
= 0; i
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++i
)
11380 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[i
]->stmt
),
11381 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]->stmt
),
11382 e
, gimple_phi_arg_location (phi
, e
->dest_idx
));
11386 /* Handle stmts whose DEF is used outside the loop-nest that is
11387 being vectorized. */
11388 if (is_a
<loop_vec_info
> (vinfo
))
11389 done
= can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11390 stmt_info
, gsi
, slp_node
,
11391 slp_node_instance
, true, NULL
);
11398 /* Remove a group of stores (for SLP or interleaving), free their
11402 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11404 stmt_vec_info next_stmt_info
= first_stmt_info
;
11406 while (next_stmt_info
)
11408 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11409 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11410 /* Free the attached stmt_vec_info and remove the stmt. */
11411 vinfo
->remove_stmt (next_stmt_info
);
11412 next_stmt_info
= tmp
;
11416 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11417 elements of type SCALAR_TYPE, or null if the target doesn't support
11420 If NUNITS is zero, return a vector type that contains elements of
11421 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11423 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11424 for this vectorization region and want to "autodetect" the best choice.
11425 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11426 and we want the new type to be interoperable with it. PREVAILING_MODE
11427 in this case can be a scalar integer mode or a vector mode; when it
11428 is a vector mode, the function acts like a tree-level version of
11429 related_vector_mode. */
11432 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11433 tree scalar_type
, poly_uint64 nunits
)
11435 tree orig_scalar_type
= scalar_type
;
11436 scalar_mode inner_mode
;
11437 machine_mode simd_mode
;
11440 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11441 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11444 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11446 /* For vector types of elements whose mode precision doesn't
11447 match their types precision we use a element type of mode
11448 precision. The vectorization routines will have to make sure
11449 they support the proper result truncation/extension.
11450 We also make sure to build vector types with INTEGER_TYPE
11451 component type only. */
11452 if (INTEGRAL_TYPE_P (scalar_type
)
11453 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11454 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11455 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11456 TYPE_UNSIGNED (scalar_type
));
11458 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11459 When the component mode passes the above test simply use a type
11460 corresponding to that mode. The theory is that any use that
11461 would cause problems with this will disable vectorization anyway. */
11462 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11463 && !INTEGRAL_TYPE_P (scalar_type
))
11464 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11466 /* We can't build a vector type of elements with alignment bigger than
11468 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11469 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11470 TYPE_UNSIGNED (scalar_type
));
11472 /* If we felt back to using the mode fail if there was
11473 no scalar type for it. */
11474 if (scalar_type
== NULL_TREE
)
11477 /* If no prevailing mode was supplied, use the mode the target prefers.
11478 Otherwise lookup a vector mode based on the prevailing mode. */
11479 if (prevailing_mode
== VOIDmode
)
11481 gcc_assert (known_eq (nunits
, 0U));
11482 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11483 if (SCALAR_INT_MODE_P (simd_mode
))
11485 /* Traditional behavior is not to take the integer mode
11486 literally, but simply to use it as a way of determining
11487 the vector size. It is up to mode_for_vector to decide
11488 what the TYPE_MODE should be.
11490 Note that nunits == 1 is allowed in order to support single
11491 element vector types. */
11492 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11493 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11497 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11498 || !related_vector_mode (prevailing_mode
,
11499 inner_mode
, nunits
).exists (&simd_mode
))
11501 /* Fall back to using mode_for_vector, mostly in the hope of being
11502 able to use an integer mode. */
11503 if (known_eq (nunits
, 0U)
11504 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11507 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11511 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11513 /* In cases where the mode was chosen by mode_for_vector, check that
11514 the target actually supports the chosen mode, or that it at least
11515 allows the vector mode to be replaced by a like-sized integer. */
11516 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11517 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11520 /* Re-attach the address-space qualifier if we canonicalized the scalar
11522 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11523 return build_qualified_type
11524 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11529 /* Function get_vectype_for_scalar_type.
11531 Returns the vector type corresponding to SCALAR_TYPE as supported
11532 by the target. If GROUP_SIZE is nonzero and we're performing BB
11533 vectorization, make sure that the number of elements in the vector
11534 is no bigger than GROUP_SIZE. */
11537 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11538 unsigned int group_size
)
11540 /* For BB vectorization, we should always have a group size once we've
11541 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11542 are tentative requests during things like early data reference
11543 analysis and pattern recognition. */
11544 if (is_a
<bb_vec_info
> (vinfo
))
11545 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11549 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11551 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11552 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11554 /* Register the natural choice of vector type, before the group size
11555 has been applied. */
11557 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11559 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11560 try again with an explicit number of elements. */
11563 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11565 /* Start with the biggest number of units that fits within
11566 GROUP_SIZE and halve it until we find a valid vector type.
11567 Usually either the first attempt will succeed or all will
11568 fail (in the latter case because GROUP_SIZE is too small
11569 for the target), but it's possible that a target could have
11570 a hole between supported vector types.
11572 If GROUP_SIZE is not a power of 2, this has the effect of
11573 trying the largest power of 2 that fits within the group,
11574 even though the group is not a multiple of that vector size.
11575 The BB vectorizer will then try to carve up the group into
11577 unsigned int nunits
= 1 << floor_log2 (group_size
);
11580 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11581 scalar_type
, nunits
);
11584 while (nunits
> 1 && !vectype
);
11590 /* Return the vector type corresponding to SCALAR_TYPE as supported
11591 by the target. NODE, if nonnull, is the SLP tree node that will
11592 use the returned vector type. */
11595 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11597 unsigned int group_size
= 0;
11600 group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
11601 if (group_size
== 0)
11602 group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
11604 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11607 /* Function get_mask_type_for_scalar_type.
11609 Returns the mask type corresponding to a result of comparison
11610 of vectors of specified SCALAR_TYPE as supported by target.
11611 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11612 make sure that the number of elements in the vector is no bigger
11613 than GROUP_SIZE. */
11616 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11617 unsigned int group_size
)
11619 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11624 return truth_type_for (vectype
);
11627 /* Function get_same_sized_vectype
11629 Returns a vector type corresponding to SCALAR_TYPE of size
11630 VECTOR_TYPE if supported by the target. */
11633 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11635 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11636 return truth_type_for (vector_type
);
11638 poly_uint64 nunits
;
11639 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11640 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11643 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11644 scalar_type
, nunits
);
11647 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11648 would not change the chosen vector modes. */
11651 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11653 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11654 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11655 if (!VECTOR_MODE_P (*i
)
11656 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11661 /* Function vect_is_simple_use.
11664 VINFO - the vect info of the loop or basic block that is being vectorized.
11665 OPERAND - operand in the loop or bb.
11667 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11668 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11669 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11670 the definition could be anywhere in the function
11671 DT - the type of definition
11673 Returns whether a stmt with OPERAND can be vectorized.
11674 For loops, supportable operands are constants, loop invariants, and operands
11675 that are defined by the current iteration of the loop. Unsupportable
11676 operands are those that are defined by a previous iteration of the loop (as
11677 is the case in reduction/induction computations).
11678 For basic blocks, supportable operands are constants and bb invariants.
11679 For now, operands defined outside the basic block are not supported. */
11682 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11683 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11685 if (def_stmt_info_out
)
11686 *def_stmt_info_out
= NULL
;
11688 *def_stmt_out
= NULL
;
11689 *dt
= vect_unknown_def_type
;
11691 if (dump_enabled_p ())
11693 dump_printf_loc (MSG_NOTE
, vect_location
,
11694 "vect_is_simple_use: operand ");
11695 if (TREE_CODE (operand
) == SSA_NAME
11696 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11697 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11699 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11702 if (CONSTANT_CLASS_P (operand
))
11703 *dt
= vect_constant_def
;
11704 else if (is_gimple_min_invariant (operand
))
11705 *dt
= vect_external_def
;
11706 else if (TREE_CODE (operand
) != SSA_NAME
)
11707 *dt
= vect_unknown_def_type
;
11708 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11709 *dt
= vect_external_def
;
11712 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11713 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11715 *dt
= vect_external_def
;
11718 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11719 def_stmt
= stmt_vinfo
->stmt
;
11720 switch (gimple_code (def_stmt
))
11723 case GIMPLE_ASSIGN
:
11725 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11728 *dt
= vect_unknown_def_type
;
11731 if (def_stmt_info_out
)
11732 *def_stmt_info_out
= stmt_vinfo
;
11735 *def_stmt_out
= def_stmt
;
11738 if (dump_enabled_p ())
11740 dump_printf (MSG_NOTE
, ", type of def: ");
11743 case vect_uninitialized_def
:
11744 dump_printf (MSG_NOTE
, "uninitialized\n");
11746 case vect_constant_def
:
11747 dump_printf (MSG_NOTE
, "constant\n");
11749 case vect_external_def
:
11750 dump_printf (MSG_NOTE
, "external\n");
11752 case vect_internal_def
:
11753 dump_printf (MSG_NOTE
, "internal\n");
11755 case vect_induction_def
:
11756 dump_printf (MSG_NOTE
, "induction\n");
11758 case vect_reduction_def
:
11759 dump_printf (MSG_NOTE
, "reduction\n");
11761 case vect_double_reduction_def
:
11762 dump_printf (MSG_NOTE
, "double reduction\n");
11764 case vect_nested_cycle
:
11765 dump_printf (MSG_NOTE
, "nested cycle\n");
11767 case vect_unknown_def_type
:
11768 dump_printf (MSG_NOTE
, "unknown\n");
11773 if (*dt
== vect_unknown_def_type
)
11775 if (dump_enabled_p ())
11776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11777 "Unsupported pattern.\n");
11784 /* Function vect_is_simple_use.
11786 Same as vect_is_simple_use but also determines the vector operand
11787 type of OPERAND and stores it to *VECTYPE. If the definition of
11788 OPERAND is vect_uninitialized_def, vect_constant_def or
11789 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11790 is responsible to compute the best suited vector type for the
11794 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11795 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11796 gimple
**def_stmt_out
)
11798 stmt_vec_info def_stmt_info
;
11800 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11804 *def_stmt_out
= def_stmt
;
11805 if (def_stmt_info_out
)
11806 *def_stmt_info_out
= def_stmt_info
;
11808 /* Now get a vector type if the def is internal, otherwise supply
11809 NULL_TREE and leave it up to the caller to figure out a proper
11810 type for the use stmt. */
11811 if (*dt
== vect_internal_def
11812 || *dt
== vect_induction_def
11813 || *dt
== vect_reduction_def
11814 || *dt
== vect_double_reduction_def
11815 || *dt
== vect_nested_cycle
)
11817 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11818 gcc_assert (*vectype
!= NULL_TREE
);
11819 if (dump_enabled_p ())
11820 dump_printf_loc (MSG_NOTE
, vect_location
,
11821 "vect_is_simple_use: vectype %T\n", *vectype
);
11823 else if (*dt
== vect_uninitialized_def
11824 || *dt
== vect_constant_def
11825 || *dt
== vect_external_def
)
11826 *vectype
= NULL_TREE
;
11828 gcc_unreachable ();
11833 /* Function vect_is_simple_use.
11835 Same as vect_is_simple_use but determines the operand by operand
11836 position OPERAND from either STMT or SLP_NODE, filling in *OP
11837 and *SLP_DEF (when SLP_NODE is not NULL). */
11840 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11841 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11842 enum vect_def_type
*dt
,
11843 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11847 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11849 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11850 *op
= gimple_get_lhs (SLP_TREE_SCALAR_STMTS (child
)[0]->stmt
);
11852 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11856 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11858 *op
= gimple_op (ass
, operand
+ 1);
11859 /* ??? Ick. But it will vanish with SLP only. */
11860 if (TREE_CODE (*op
) == VIEW_CONVERT_EXPR
)
11861 *op
= TREE_OPERAND (*op
, 0);
11863 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11864 *op
= gimple_call_arg (call
, operand
);
11866 gcc_unreachable ();
11869 /* ??? We might want to update *vectype from *slp_def here though
11870 when sharing nodes this would prevent unsharing in the caller. */
11871 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11874 /* If OP is not NULL and is external or constant update its vector
11875 type with VECTYPE. Returns true if successful or false if not,
11876 for example when conflicting vector types are present. */
11879 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11881 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11883 if (SLP_TREE_VECTYPE (op
))
11884 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11885 SLP_TREE_VECTYPE (op
) = vectype
;
11889 /* Function supportable_widening_operation
11891 Check whether an operation represented by the code CODE is a
11892 widening operation that is supported by the target platform in
11893 vector form (i.e., when operating on arguments of type VECTYPE_IN
11894 producing a result of type VECTYPE_OUT).
11896 Widening operations we currently support are NOP (CONVERT), FLOAT,
11897 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11898 are supported by the target platform either directly (via vector
11899 tree-codes), or via target builtins.
11902 - CODE1 and CODE2 are codes of vector operations to be used when
11903 vectorizing the operation, if available.
11904 - MULTI_STEP_CVT determines the number of required intermediate steps in
11905 case of multi-step conversion (like char->short->int - in that case
11906 MULTI_STEP_CVT will be 1).
11907 - INTERM_TYPES contains the intermediate type required to perform the
11908 widening operation (short in the above example). */
11911 supportable_widening_operation (vec_info
*vinfo
,
11912 enum tree_code code
, stmt_vec_info stmt_info
,
11913 tree vectype_out
, tree vectype_in
,
11914 enum tree_code
*code1
, enum tree_code
*code2
,
11915 int *multi_step_cvt
,
11916 vec
<tree
> *interm_types
)
11918 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11919 class loop
*vect_loop
= NULL
;
11920 machine_mode vec_mode
;
11921 enum insn_code icode1
, icode2
;
11922 optab optab1
, optab2
;
11923 tree vectype
= vectype_in
;
11924 tree wide_vectype
= vectype_out
;
11925 enum tree_code c1
, c2
;
11927 tree prev_type
, intermediate_type
;
11928 machine_mode intermediate_mode
, prev_mode
;
11929 optab optab3
, optab4
;
11931 *multi_step_cvt
= 0;
11933 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11937 case WIDEN_MULT_EXPR
:
11938 /* The result of a vectorized widening operation usually requires
11939 two vectors (because the widened results do not fit into one vector).
11940 The generated vector results would normally be expected to be
11941 generated in the same order as in the original scalar computation,
11942 i.e. if 8 results are generated in each vector iteration, they are
11943 to be organized as follows:
11944 vect1: [res1,res2,res3,res4],
11945 vect2: [res5,res6,res7,res8].
11947 However, in the special case that the result of the widening
11948 operation is used in a reduction computation only, the order doesn't
11949 matter (because when vectorizing a reduction we change the order of
11950 the computation). Some targets can take advantage of this and
11951 generate more efficient code. For example, targets like Altivec,
11952 that support widen_mult using a sequence of {mult_even,mult_odd}
11953 generate the following vectors:
11954 vect1: [res1,res3,res5,res7],
11955 vect2: [res2,res4,res6,res8].
11957 When vectorizing outer-loops, we execute the inner-loop sequentially
11958 (each vectorized inner-loop iteration contributes to VF outer-loop
11959 iterations in parallel). We therefore don't allow to change the
11960 order of the computation in the inner-loop during outer-loop
11962 /* TODO: Another case in which order doesn't *really* matter is when we
11963 widen and then contract again, e.g. (short)((int)x * y >> 8).
11964 Normally, pack_trunc performs an even/odd permute, whereas the
11965 repack from an even/odd expansion would be an interleave, which
11966 would be significantly simpler for e.g. AVX2. */
11967 /* In any case, in order to avoid duplicating the code below, recurse
11968 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11969 are properly set up for the caller. If we fail, we'll continue with
11970 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11972 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11973 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11974 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11975 stmt_info
, vectype_out
,
11976 vectype_in
, code1
, code2
,
11977 multi_step_cvt
, interm_types
))
11979 /* Elements in a vector with vect_used_by_reduction property cannot
11980 be reordered if the use chain with this property does not have the
11981 same operation. One such an example is s += a * b, where elements
11982 in a and b cannot be reordered. Here we check if the vector defined
11983 by STMT is only directly used in the reduction statement. */
11984 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11985 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11987 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11990 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11991 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11994 case DOT_PROD_EXPR
:
11995 c1
= DOT_PROD_EXPR
;
11996 c2
= DOT_PROD_EXPR
;
12004 case VEC_WIDEN_MULT_EVEN_EXPR
:
12005 /* Support the recursion induced just above. */
12006 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
12007 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
12010 case WIDEN_LSHIFT_EXPR
:
12011 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
12012 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
12016 c1
= VEC_UNPACK_LO_EXPR
;
12017 c2
= VEC_UNPACK_HI_EXPR
;
12021 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
12022 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
12025 case FIX_TRUNC_EXPR
:
12026 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
12027 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
12031 gcc_unreachable ();
12034 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
12035 std::swap (c1
, c2
);
12037 if (code
== FIX_TRUNC_EXPR
)
12039 /* The signedness is determined from output operand. */
12040 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12041 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
12043 else if (CONVERT_EXPR_CODE_P (code
)
12044 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
12045 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12046 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
12047 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12049 /* If the input and result modes are the same, a different optab
12050 is needed where we pass in the number of units in vectype. */
12051 optab1
= vec_unpacks_sbool_lo_optab
;
12052 optab2
= vec_unpacks_sbool_hi_optab
;
12056 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12057 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
12060 if (!optab1
|| !optab2
)
12063 vec_mode
= TYPE_MODE (vectype
);
12064 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
12065 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
12071 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12072 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12074 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12076 /* For scalar masks we may have different boolean
12077 vector types having the same QImode. Thus we
12078 add additional check for elements number. */
12079 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
12080 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12084 /* Check if it's a multi-step conversion that can be done using intermediate
12087 prev_type
= vectype
;
12088 prev_mode
= vec_mode
;
12090 if (!CONVERT_EXPR_CODE_P (code
))
12093 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12094 intermediate steps in promotion sequence. We try
12095 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12097 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12098 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12100 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12101 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12103 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
12106 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
12107 TYPE_UNSIGNED (prev_type
));
12109 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12110 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12111 && intermediate_mode
== prev_mode
12112 && SCALAR_INT_MODE_P (prev_mode
))
12114 /* If the input and result modes are the same, a different optab
12115 is needed where we pass in the number of units in vectype. */
12116 optab3
= vec_unpacks_sbool_lo_optab
;
12117 optab4
= vec_unpacks_sbool_hi_optab
;
12121 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12122 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12125 if (!optab3
|| !optab4
12126 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12127 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12128 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12129 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12130 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12131 == CODE_FOR_nothing
)
12132 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12133 == CODE_FOR_nothing
))
12136 interm_types
->quick_push (intermediate_type
);
12137 (*multi_step_cvt
)++;
12139 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12140 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12142 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12144 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12145 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12149 prev_type
= intermediate_type
;
12150 prev_mode
= intermediate_mode
;
12153 interm_types
->release ();
12158 /* Function supportable_narrowing_operation
12160 Check whether an operation represented by the code CODE is a
12161 narrowing operation that is supported by the target platform in
12162 vector form (i.e., when operating on arguments of type VECTYPE_IN
12163 and producing a result of type VECTYPE_OUT).
12165 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12166 and FLOAT. This function checks if these operations are supported by
12167 the target platform directly via vector tree-codes.
12170 - CODE1 is the code of a vector operation to be used when
12171 vectorizing the operation, if available.
12172 - MULTI_STEP_CVT determines the number of required intermediate steps in
12173 case of multi-step conversion (like int->short->char - in that case
12174 MULTI_STEP_CVT will be 1).
12175 - INTERM_TYPES contains the intermediate type required to perform the
12176 narrowing operation (short in the above example). */
12179 supportable_narrowing_operation (enum tree_code code
,
12180 tree vectype_out
, tree vectype_in
,
12181 enum tree_code
*code1
, int *multi_step_cvt
,
12182 vec
<tree
> *interm_types
)
12184 machine_mode vec_mode
;
12185 enum insn_code icode1
;
12186 optab optab1
, interm_optab
;
12187 tree vectype
= vectype_in
;
12188 tree narrow_vectype
= vectype_out
;
12190 tree intermediate_type
, prev_type
;
12191 machine_mode intermediate_mode
, prev_mode
;
12195 *multi_step_cvt
= 0;
12199 c1
= VEC_PACK_TRUNC_EXPR
;
12200 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12201 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12202 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
12203 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12204 optab1
= vec_pack_sbool_trunc_optab
;
12206 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12209 case FIX_TRUNC_EXPR
:
12210 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12211 /* The signedness is determined from output operand. */
12212 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12216 c1
= VEC_PACK_FLOAT_EXPR
;
12217 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12221 gcc_unreachable ();
12227 vec_mode
= TYPE_MODE (vectype
);
12228 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12233 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12235 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12237 /* For scalar masks we may have different boolean
12238 vector types having the same QImode. Thus we
12239 add additional check for elements number. */
12240 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12241 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12245 if (code
== FLOAT_EXPR
)
12248 /* Check if it's a multi-step conversion that can be done using intermediate
12250 prev_mode
= vec_mode
;
12251 prev_type
= vectype
;
12252 if (code
== FIX_TRUNC_EXPR
)
12253 uns
= TYPE_UNSIGNED (vectype_out
);
12255 uns
= TYPE_UNSIGNED (vectype
);
12257 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12258 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12259 costly than signed. */
12260 if (code
== FIX_TRUNC_EXPR
&& uns
)
12262 enum insn_code icode2
;
12265 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12267 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12268 if (interm_optab
!= unknown_optab
12269 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12270 && insn_data
[icode1
].operand
[0].mode
12271 == insn_data
[icode2
].operand
[0].mode
)
12274 optab1
= interm_optab
;
12279 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12280 intermediate steps in promotion sequence. We try
12281 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12282 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12283 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12285 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12286 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12288 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12291 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12292 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12293 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12294 && intermediate_mode
== prev_mode
12295 && SCALAR_INT_MODE_P (prev_mode
))
12296 interm_optab
= vec_pack_sbool_trunc_optab
;
12299 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12302 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12303 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12304 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12305 == CODE_FOR_nothing
))
12308 interm_types
->quick_push (intermediate_type
);
12309 (*multi_step_cvt
)++;
12311 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12313 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12315 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12316 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12320 prev_mode
= intermediate_mode
;
12321 prev_type
= intermediate_type
;
12322 optab1
= interm_optab
;
12325 interm_types
->release ();
12329 /* Generate and return a statement that sets vector mask MASK such that
12330 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12333 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
12335 tree cmp_type
= TREE_TYPE (start_index
);
12336 tree mask_type
= TREE_TYPE (mask
);
12337 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12338 cmp_type
, mask_type
,
12339 OPTIMIZE_FOR_SPEED
));
12340 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12341 start_index
, end_index
,
12342 build_zero_cst (mask_type
));
12343 gimple_call_set_lhs (call
, mask
);
12347 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12348 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12351 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12354 tree tmp
= make_ssa_name (mask_type
);
12355 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
12356 gimple_seq_add_stmt (seq
, call
);
12357 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12360 /* Try to compute the vector types required to vectorize STMT_INFO,
12361 returning true on success and false if vectorization isn't possible.
12362 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12363 take sure that the number of elements in the vectors is no bigger
12368 - Set *STMT_VECTYPE_OUT to:
12369 - NULL_TREE if the statement doesn't need to be vectorized;
12370 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12372 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12373 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12374 statement does not help to determine the overall number of units. */
12377 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12378 tree
*stmt_vectype_out
,
12379 tree
*nunits_vectype_out
,
12380 unsigned int group_size
)
12382 gimple
*stmt
= stmt_info
->stmt
;
12384 /* For BB vectorization, we should always have a group size once we've
12385 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12386 are tentative requests during things like early data reference
12387 analysis and pattern recognition. */
12388 if (is_a
<bb_vec_info
> (vinfo
))
12389 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12393 *stmt_vectype_out
= NULL_TREE
;
12394 *nunits_vectype_out
= NULL_TREE
;
12396 if (gimple_get_lhs (stmt
) == NULL_TREE
12397 /* MASK_STORE has no lhs, but is ok. */
12398 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12400 if (is_a
<gcall
*> (stmt
))
12402 /* Ignore calls with no lhs. These must be calls to
12403 #pragma omp simd functions, and what vectorization factor
12404 it really needs can't be determined until
12405 vectorizable_simd_clone_call. */
12406 if (dump_enabled_p ())
12407 dump_printf_loc (MSG_NOTE
, vect_location
,
12408 "defer to SIMD clone analysis.\n");
12409 return opt_result::success ();
12412 return opt_result::failure_at (stmt
,
12413 "not vectorized: irregular stmt.%G", stmt
);
12416 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
12417 return opt_result::failure_at (stmt
,
12418 "not vectorized: vector stmt in loop:%G",
12422 tree scalar_type
= NULL_TREE
;
12423 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12425 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12426 if (dump_enabled_p ())
12427 dump_printf_loc (MSG_NOTE
, vect_location
,
12428 "precomputed vectype: %T\n", vectype
);
12430 else if (vect_use_mask_type_p (stmt_info
))
12432 unsigned int precision
= stmt_info
->mask_precision
;
12433 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12434 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12436 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12437 " data-type %T\n", scalar_type
);
12438 if (dump_enabled_p ())
12439 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12443 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12444 scalar_type
= TREE_TYPE (DR_REF (dr
));
12445 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12446 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12448 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12450 if (dump_enabled_p ())
12453 dump_printf_loc (MSG_NOTE
, vect_location
,
12454 "get vectype for scalar type (group size %d):"
12455 " %T\n", group_size
, scalar_type
);
12457 dump_printf_loc (MSG_NOTE
, vect_location
,
12458 "get vectype for scalar type: %T\n", scalar_type
);
12460 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12462 return opt_result::failure_at (stmt
,
12464 " unsupported data-type %T\n",
12467 if (dump_enabled_p ())
12468 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12470 *stmt_vectype_out
= vectype
;
12472 /* Don't try to compute scalar types if the stmt produces a boolean
12473 vector; use the existing vector type instead. */
12474 tree nunits_vectype
= vectype
;
12475 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12477 /* The number of units is set according to the smallest scalar
12478 type (or the largest vector size, but we only support one
12479 vector size per vectorization). */
12480 HOST_WIDE_INT dummy
;
12481 scalar_type
= vect_get_smallest_scalar_type (stmt_info
, &dummy
, &dummy
);
12482 if (scalar_type
!= TREE_TYPE (vectype
))
12484 if (dump_enabled_p ())
12485 dump_printf_loc (MSG_NOTE
, vect_location
,
12486 "get vectype for smallest scalar type: %T\n",
12488 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12490 if (!nunits_vectype
)
12491 return opt_result::failure_at
12492 (stmt
, "not vectorized: unsupported data-type %T\n",
12494 if (dump_enabled_p ())
12495 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12500 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12501 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)));
12503 if (dump_enabled_p ())
12505 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12506 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12507 dump_printf (MSG_NOTE
, "\n");
12510 *nunits_vectype_out
= nunits_vectype
;
12511 return opt_result::success ();