1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
55 vect_free_slp_tree (slp_tree node
, bool final_p
)
60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
61 vect_free_slp_tree (child
, final_p
);
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
73 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
77 SLP_TREE_CHILDREN (node
).release ();
78 SLP_TREE_SCALAR_STMTS (node
).release ();
79 SLP_TREE_VEC_STMTS (node
).release ();
80 SLP_TREE_LOAD_PERMUTATION (node
).release ();
86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
91 vect_free_slp_instance (slp_instance instance
, bool final_p
)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
94 SLP_INSTANCE_LOADS (instance
).release ();
99 /* Create an SLP node for SCALAR_STMTS. */
102 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
105 gimple
*stmt
= scalar_stmts
[0];
108 if (is_gimple_call (stmt
))
109 nops
= gimple_call_num_args (stmt
);
110 else if (is_gimple_assign (stmt
))
112 nops
= gimple_num_ops (stmt
) - 1;
113 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
116 else if (gimple_code (stmt
) == GIMPLE_PHI
)
121 node
= XNEW (struct _slp_tree
);
122 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
123 SLP_TREE_VEC_STMTS (node
).create (0);
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
125 SLP_TREE_CHILDREN (node
).create (nops
);
126 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
127 SLP_TREE_TWO_OPERATORS (node
) = false;
128 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
131 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
132 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
138 /* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
141 typedef struct _slp_oprnd_info
143 /* Def-stmts for the operands. */
144 vec
<gimple
*> def_stmts
;
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
149 enum vect_def_type first_dt
;
155 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
157 static vec
<slp_oprnd_info
>
158 vect_create_oprnd_info (int nops
, int group_size
)
161 slp_oprnd_info oprnd_info
;
162 vec
<slp_oprnd_info
> oprnds_info
;
164 oprnds_info
.create (nops
);
165 for (i
= 0; i
< nops
; i
++)
167 oprnd_info
= XNEW (struct _slp_oprnd_info
);
168 oprnd_info
->def_stmts
.create (group_size
);
169 oprnd_info
->first_dt
= vect_uninitialized_def
;
170 oprnd_info
->first_op_type
= NULL_TREE
;
171 oprnd_info
->first_pattern
= false;
172 oprnd_info
->second_pattern
= false;
173 oprnds_info
.quick_push (oprnd_info
);
180 /* Free operands info. */
183 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
186 slp_oprnd_info oprnd_info
;
188 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
190 oprnd_info
->def_stmts
.release ();
191 XDELETE (oprnd_info
);
194 oprnds_info
.release ();
198 /* Find the place of the data-ref in STMT in the interleaving chain that starts
199 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
202 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
204 gimple
*next_stmt
= first_stmt
;
207 if (first_stmt
!= DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
212 if (next_stmt
== stmt
)
214 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
216 result
+= DR_GROUP_GAP (vinfo_for_stmt (next_stmt
));
223 /* Check whether it is possible to load COUNT elements of type ELT_MODE
224 using the method implemented by duplicate_and_interleave. Return true
225 if so, returning the number of intermediate vectors in *NVECTORS_OUT
226 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
230 can_duplicate_and_interleave_p (unsigned int count
, machine_mode elt_mode
,
231 unsigned int *nvectors_out
,
232 tree
*vector_type_out
,
235 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
237 unsigned int nvectors
= 1;
240 scalar_int_mode int_mode
;
241 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
242 if (multiple_p (current_vector_size
, elt_bytes
, &nelts
)
243 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
245 tree int_type
= build_nonstandard_integer_type
246 (GET_MODE_BITSIZE (int_mode
), 1);
247 tree vector_type
= build_vector_type (int_type
, nelts
);
248 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
250 vec_perm_builder
sel1 (nelts
, 2, 3);
251 vec_perm_builder
sel2 (nelts
, 2, 3);
252 poly_int64 half_nelts
= exact_div (nelts
, 2);
253 for (unsigned int i
= 0; i
< 3; ++i
)
256 sel1
.quick_push (i
+ nelts
);
257 sel2
.quick_push (half_nelts
+ i
);
258 sel2
.quick_push (half_nelts
+ i
+ nelts
);
260 vec_perm_indices
indices1 (sel1
, 2, nelts
);
261 vec_perm_indices
indices2 (sel2
, 2, nelts
);
262 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
263 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
266 *nvectors_out
= nvectors
;
268 *vector_type_out
= vector_type
;
271 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
273 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
280 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
286 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
287 they are of a valid type and that they match the defs of the first stmt of
288 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
289 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
290 indicates swap is required for cond_expr stmts. Specifically, *SWAP
291 is 1 if STMT is cond and operands of comparison need to be swapped;
292 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
293 If there is any operand swap in this function, *SWAP is set to non-zero
295 If there was a fatal error return -1; if the error could be corrected by
296 swapping operands of father node of this one, return 1; if everything is
299 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
300 vec
<gimple
*> stmts
, unsigned stmt_num
,
301 vec
<slp_oprnd_info
> *oprnds_info
)
303 gimple
*stmt
= stmts
[stmt_num
];
305 unsigned int i
, number_of_oprnds
;
307 enum vect_def_type dt
= vect_uninitialized_def
;
308 bool pattern
= false;
309 slp_oprnd_info oprnd_info
;
310 int first_op_idx
= 1;
311 bool commutative
= false;
312 bool first_op_cond
= false;
313 bool first
= stmt_num
== 0;
314 bool second
= stmt_num
== 1;
316 if (is_gimple_call (stmt
))
318 number_of_oprnds
= gimple_call_num_args (stmt
);
321 else if (is_gimple_assign (stmt
))
323 enum tree_code code
= gimple_assign_rhs_code (stmt
);
324 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
325 /* Swap can only be done for cond_expr if asked to, otherwise we
326 could result in different comparison code to the first stmt. */
327 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
328 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
330 first_op_cond
= true;
334 commutative
= commutative_tree_code (code
);
339 bool swapped
= (*swap
!= 0);
340 gcc_assert (!swapped
|| first_op_cond
);
341 for (i
= 0; i
< number_of_oprnds
; i
++)
346 /* Map indicating how operands of cond_expr should be swapped. */
347 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
348 int *map
= maps
[*swap
];
351 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
353 oprnd
= gimple_op (stmt
, map
[i
]);
356 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
358 oprnd_info
= (*oprnds_info
)[i
];
360 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt
))
362 if (dump_enabled_p ())
364 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
365 "Build SLP failed: can't analyze def for ");
366 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
367 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
373 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
374 from the pattern. Check that all the stmts of the node are in the
376 if (def_stmt
&& gimple_bb (def_stmt
)
377 && vect_stmt_in_region_p (vinfo
, def_stmt
)
378 && vinfo_for_stmt (def_stmt
)
379 && is_pattern_stmt_p (vinfo_for_stmt (def_stmt
)))
382 if (!first
&& !oprnd_info
->first_pattern
383 /* Allow different pattern state for the defs of the
384 first stmt in reduction chains. */
385 && (oprnd_info
->first_dt
!= vect_reduction_def
386 || (!second
&& !oprnd_info
->second_pattern
)))
396 if (dump_enabled_p ())
398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
399 "Build SLP failed: some of the stmts"
400 " are in a pattern, and others are not ");
401 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
402 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
408 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
410 if (dt
== vect_unknown_def_type
)
412 if (dump_enabled_p ())
413 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
414 "Unsupported pattern.\n");
418 switch (gimple_code (def_stmt
))
425 if (dump_enabled_p ())
426 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
427 "unsupported defining stmt:\n");
433 oprnd_info
->second_pattern
= pattern
;
437 oprnd_info
->first_dt
= dt
;
438 oprnd_info
->first_pattern
= pattern
;
439 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
443 /* Not first stmt of the group, check that the def-stmt/s match
444 the def-stmt/s of the first stmt. Allow different definition
445 types for reduction chains: the first stmt must be a
446 vect_reduction_def (a phi node), and the rest
447 vect_internal_def. */
448 tree type
= TREE_TYPE (oprnd
);
449 if ((oprnd_info
->first_dt
!= dt
450 && !(oprnd_info
->first_dt
== vect_reduction_def
451 && dt
== vect_internal_def
)
452 && !((oprnd_info
->first_dt
== vect_external_def
453 || oprnd_info
->first_dt
== vect_constant_def
)
454 && (dt
== vect_external_def
455 || dt
== vect_constant_def
)))
456 || !types_compatible_p (oprnd_info
->first_op_type
, type
))
458 /* Try swapping operands if we got a mismatch. */
467 if (dump_enabled_p ())
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
469 "Build SLP failed: different types\n");
473 if ((dt
== vect_constant_def
474 || dt
== vect_external_def
)
475 && !current_vector_size
.is_constant ()
476 && (TREE_CODE (type
) == BOOLEAN_TYPE
477 || !can_duplicate_and_interleave_p (stmts
.length (),
480 if (dump_enabled_p ())
482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
483 "Build SLP failed: invalid type of def "
484 "for variable-length SLP ");
485 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
486 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
492 /* Check the types of the definitions. */
495 case vect_constant_def
:
496 case vect_external_def
:
499 case vect_reduction_def
:
500 case vect_induction_def
:
501 case vect_internal_def
:
502 oprnd_info
->def_stmts
.quick_push (def_stmt
);
506 /* FORNOW: Not supported. */
507 if (dump_enabled_p ())
509 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
510 "Build SLP failed: illegal type of def ");
511 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
512 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
522 /* If there are already uses of this stmt in a SLP instance then
523 we've committed to the operand order and can't swap it. */
524 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
526 if (dump_enabled_p ())
528 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
529 "Build SLP failed: cannot swap operands of "
531 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
538 tree cond
= gimple_assign_rhs1 (stmt
);
539 enum tree_code code
= TREE_CODE (cond
);
544 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
545 &TREE_OPERAND (cond
, 1));
546 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
551 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
552 gimple_assign_rhs3_ptr (stmt
));
553 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
554 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
555 gcc_assert (code
!= ERROR_MARK
);
556 TREE_SET_CODE (cond
, code
);
560 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
561 gimple_assign_rhs2_ptr (stmt
));
562 if (dump_enabled_p ())
564 dump_printf_loc (MSG_NOTE
, vect_location
,
565 "swapped operands to match def types in ");
566 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
574 /* Return true if call statements CALL1 and CALL2 are similar enough
575 to be combined into the same SLP group. */
578 compatible_calls_p (gcall
*call1
, gcall
*call2
)
580 unsigned int nargs
= gimple_call_num_args (call1
);
581 if (nargs
!= gimple_call_num_args (call2
))
584 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
587 if (gimple_call_internal_p (call1
))
589 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
590 TREE_TYPE (gimple_call_lhs (call2
))))
592 for (unsigned int i
= 0; i
< nargs
; ++i
)
593 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
594 TREE_TYPE (gimple_call_arg (call2
, i
))))
599 if (!operand_equal_p (gimple_call_fn (call1
),
600 gimple_call_fn (call2
), 0))
603 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
609 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
610 caller's attempt to find the vector type in STMT with the narrowest
611 element type. Return true if VECTYPE is nonnull and if it is valid
612 for VINFO. When returning true, update MAX_NUNITS to reflect the
613 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
614 as for vect_build_slp_tree. */
617 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
618 tree vectype
, poly_uint64
*max_nunits
)
622 if (dump_enabled_p ())
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
625 "Build SLP failed: unsupported data-type in ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
627 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
629 /* Fatal mismatch. */
633 /* If populating the vector type requires unrolling then fail
634 before adjusting *max_nunits for basic-block vectorization. */
635 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
636 unsigned HOST_WIDE_INT const_nunits
;
637 if (is_a
<bb_vec_info
> (vinfo
)
638 && (!nunits
.is_constant (&const_nunits
)
639 || const_nunits
> group_size
))
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
642 "Build SLP failed: unrolling required "
643 "in basic block SLP\n");
644 /* Fatal mismatch. */
648 /* In case of multiple types we need to detect the smallest type. */
649 vect_update_max_nunits (max_nunits
, vectype
);
653 /* STMTS is a group of GROUP_SIZE SLP statements in which some
654 statements do the same operation as the first statement and in which
655 the others do ALT_STMT_CODE. Return true if we can take one vector
656 of the first operation and one vector of the second and permute them
657 to get the required result. VECTYPE is the type of the vector that
658 would be permuted. */
661 vect_two_operations_perm_ok_p (vec
<gimple
*> stmts
, unsigned int group_size
,
662 tree vectype
, tree_code alt_stmt_code
)
664 unsigned HOST_WIDE_INT count
;
665 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
668 vec_perm_builder
sel (count
, count
, 1);
669 for (unsigned int i
= 0; i
< count
; ++i
)
671 unsigned int elt
= i
;
672 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
674 sel
.quick_push (elt
);
676 vec_perm_indices
indices (sel
, 2, count
);
677 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
680 /* Verify if the scalar stmts STMTS are isomorphic, require data
681 permutation or are of unsupported types of operation. Return
682 true if they are, otherwise return false and indicate in *MATCHES
683 which stmts are not isomorphic to the first one. If MATCHES[0]
684 is false then this indicates the comparison could not be
685 carried out or the stmts will never be vectorized by SLP.
687 Note COND_EXPR is possibly ismorphic to another one after swapping its
688 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
689 the first stmt by swapping the two operands of comparison; set SWAP[i]
690 to 2 if stmt I is isormorphic to the first stmt by inverting the code
691 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
692 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
695 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
696 vec
<gimple
*> stmts
, unsigned int group_size
,
697 poly_uint64
*max_nunits
, bool *matches
,
701 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
702 enum tree_code first_stmt_code
= ERROR_MARK
;
703 enum tree_code alt_stmt_code
= ERROR_MARK
;
704 enum tree_code rhs_code
= ERROR_MARK
;
705 enum tree_code first_cond_code
= ERROR_MARK
;
707 bool need_same_oprnds
= false;
708 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
711 machine_mode optab_op2_mode
;
712 machine_mode vec_mode
;
713 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
715 /* For every stmt in NODE find its def stmt/s. */
716 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
718 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
722 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
725 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
728 /* Fail to vectorize statements marked as unvectorizable. */
729 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
731 if (dump_enabled_p ())
733 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
734 "Build SLP failed: unvectorizable statement ");
735 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
737 /* Fatal mismatch. */
742 lhs
= gimple_get_lhs (stmt
);
743 if (lhs
== NULL_TREE
)
745 if (dump_enabled_p ())
747 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
748 "Build SLP failed: not GIMPLE_ASSIGN nor "
750 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
752 /* Fatal mismatch. */
758 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
761 && !vect_record_max_nunits (vinfo
, stmt
, group_size
,
762 nunits_vectype
, max_nunits
)))
764 /* Fatal mismatch. */
769 gcc_assert (vectype
);
771 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
773 rhs_code
= CALL_EXPR
;
774 if ((gimple_call_internal_p (call_stmt
)
775 && (!vectorizable_internal_fn_p
776 (gimple_call_internal_fn (call_stmt
))))
777 || gimple_call_tail_p (call_stmt
)
778 || gimple_call_noreturn_p (call_stmt
)
779 || !gimple_call_nothrow_p (call_stmt
)
780 || gimple_call_chain (call_stmt
))
782 if (dump_enabled_p ())
784 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
785 "Build SLP failed: unsupported call type ");
786 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
789 /* Fatal mismatch. */
795 rhs_code
= gimple_assign_rhs_code (stmt
);
797 /* Check the operation. */
800 first_stmt_code
= rhs_code
;
802 /* Shift arguments should be equal in all the packed stmts for a
803 vector shift with scalar shift operand. */
804 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
805 || rhs_code
== LROTATE_EXPR
806 || rhs_code
== RROTATE_EXPR
)
808 if (vectype
== boolean_type_node
)
810 if (dump_enabled_p ())
811 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
812 "Build SLP failed: shift of a"
814 /* Fatal mismatch. */
819 vec_mode
= TYPE_MODE (vectype
);
821 /* First see if we have a vector/vector shift. */
822 optab
= optab_for_tree_code (rhs_code
, vectype
,
826 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
828 /* No vector/vector shift, try for a vector/scalar shift. */
829 optab
= optab_for_tree_code (rhs_code
, vectype
,
834 if (dump_enabled_p ())
835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
836 "Build SLP failed: no optab.\n");
837 /* Fatal mismatch. */
841 icode
= (int) optab_handler (optab
, vec_mode
);
842 if (icode
== CODE_FOR_nothing
)
844 if (dump_enabled_p ())
845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
847 "op not supported by target.\n");
848 /* Fatal mismatch. */
852 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
853 if (!VECTOR_MODE_P (optab_op2_mode
))
855 need_same_oprnds
= true;
856 first_op1
= gimple_assign_rhs2 (stmt
);
860 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
862 need_same_oprnds
= true;
863 first_op1
= gimple_assign_rhs2 (stmt
);
868 if (first_stmt_code
!= rhs_code
869 && alt_stmt_code
== ERROR_MARK
)
870 alt_stmt_code
= rhs_code
;
871 if (first_stmt_code
!= rhs_code
872 && (first_stmt_code
!= IMAGPART_EXPR
873 || rhs_code
!= REALPART_EXPR
)
874 && (first_stmt_code
!= REALPART_EXPR
875 || rhs_code
!= IMAGPART_EXPR
)
876 /* Handle mismatches in plus/minus by computing both
877 and merging the results. */
878 && !((first_stmt_code
== PLUS_EXPR
879 || first_stmt_code
== MINUS_EXPR
)
880 && (alt_stmt_code
== PLUS_EXPR
881 || alt_stmt_code
== MINUS_EXPR
)
882 && rhs_code
== alt_stmt_code
)
883 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
884 && (first_stmt_code
== ARRAY_REF
885 || first_stmt_code
== BIT_FIELD_REF
886 || first_stmt_code
== INDIRECT_REF
887 || first_stmt_code
== COMPONENT_REF
888 || first_stmt_code
== MEM_REF
)))
890 if (dump_enabled_p ())
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
893 "Build SLP failed: different operation "
895 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
896 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
898 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
906 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
908 if (dump_enabled_p ())
910 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
911 "Build SLP failed: different shift "
913 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
919 if (rhs_code
== CALL_EXPR
)
921 gimple
*first_stmt
= stmts
[0];
922 if (!compatible_calls_p (as_a
<gcall
*> (first_stmt
),
923 as_a
<gcall
*> (stmt
)))
925 if (dump_enabled_p ())
927 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
928 "Build SLP failed: different calls in ");
929 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
938 /* Grouped store or load. */
939 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
941 if (REFERENCE_CLASS_P (lhs
))
949 first_load
= DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
952 /* Check that there are no loads from different interleaving
953 chains in the same node. */
954 if (prev_first_load
!= first_load
)
956 if (dump_enabled_p ())
958 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
960 "Build SLP failed: different "
961 "interleaving chains in one node ");
962 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
970 prev_first_load
= first_load
;
972 } /* Grouped access. */
975 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
977 /* Not grouped load. */
978 if (dump_enabled_p ())
980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
981 "Build SLP failed: not grouped load ");
982 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
985 /* FORNOW: Not grouped loads are not supported. */
986 /* Fatal mismatch. */
991 /* Not memory operation. */
992 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
993 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
994 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
995 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
996 && rhs_code
!= CALL_EXPR
)
998 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1001 "Build SLP failed: operation");
1002 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
1003 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
1005 /* Fatal mismatch. */
1010 if (rhs_code
== COND_EXPR
)
1012 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1013 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1014 enum tree_code swap_code
= ERROR_MARK
;
1015 enum tree_code invert_code
= ERROR_MARK
;
1018 first_cond_code
= TREE_CODE (cond_expr
);
1019 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1021 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1022 swap_code
= swap_tree_comparison (cond_code
);
1023 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1026 if (first_cond_code
== cond_code
)
1028 /* Isomorphic can be achieved by swapping. */
1029 else if (first_cond_code
== swap_code
)
1031 /* Isomorphic can be achieved by inverting. */
1032 else if (first_cond_code
== invert_code
)
1036 if (dump_enabled_p ())
1038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1039 "Build SLP failed: different"
1041 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1053 for (i
= 0; i
< group_size
; ++i
)
1057 /* If we allowed a two-operation SLP node verify the target can cope
1058 with the permute we are going to use. */
1059 if (alt_stmt_code
!= ERROR_MARK
1060 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1062 if (vectype
== boolean_type_node
1063 || !vect_two_operations_perm_ok_p (stmts
, group_size
,
1064 vectype
, alt_stmt_code
))
1066 for (i
= 0; i
< group_size
; ++i
)
1067 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
1070 if (dump_enabled_p ())
1072 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1073 "Build SLP failed: different operation "
1075 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1077 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1079 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1085 *two_operators
= true;
1091 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1092 Note we never remove apart from at destruction time so we do not
1093 need a special value for deleted that differs from empty. */
1096 typedef vec
<gimple
*> value_type
;
1097 typedef vec
<gimple
*> compare_type
;
1098 static inline hashval_t
hash (value_type
);
1099 static inline bool equal (value_type existing
, value_type candidate
);
1100 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1101 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1102 static inline void mark_empty (value_type
&x
) { x
.release (); }
1103 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1104 static inline void remove (value_type
&x
) { x
.release (); }
1107 bst_traits::hash (value_type x
)
1110 for (unsigned i
= 0; i
< x
.length (); ++i
)
1111 h
.add_int (gimple_uid (x
[i
]));
1115 bst_traits::equal (value_type existing
, value_type candidate
)
1117 if (existing
.length () != candidate
.length ())
1119 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1120 if (existing
[i
] != candidate
[i
])
1125 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
1126 static scalar_stmts_set_t
*bst_fail
;
1128 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1129 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1130 scalar_stmts_to_slp_tree_map_t
;
1133 vect_build_slp_tree_2 (vec_info
*vinfo
,
1134 vec
<gimple
*> stmts
, unsigned int group_size
,
1135 poly_uint64
*max_nunits
,
1136 vec
<slp_tree
> *loads
,
1137 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1138 unsigned max_tree_size
);
1141 vect_build_slp_tree (vec_info
*vinfo
,
1142 vec
<gimple
*> stmts
, unsigned int group_size
,
1143 poly_uint64
*max_nunits
, vec
<slp_tree
> *loads
,
1144 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1145 unsigned max_tree_size
)
1147 if (bst_fail
->contains (stmts
))
1149 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1150 loads
, matches
, npermutes
, tree_size
,
1152 /* When SLP build fails for stmts record this, otherwise SLP build
1153 can be exponential in time when we allow to construct parts from
1154 scalars, see PR81723. */
1158 x
.create (stmts
.length ());
1165 /* Recursively build an SLP tree starting from NODE.
1166 Fail (and return a value not equal to zero) if def-stmts are not
1167 isomorphic, require data permutation or are of unsupported types of
1168 operation. Otherwise, return 0.
1169 The value returned is the depth in the SLP tree where a mismatch
1173 vect_build_slp_tree_2 (vec_info
*vinfo
,
1174 vec
<gimple
*> stmts
, unsigned int group_size
,
1175 poly_uint64
*max_nunits
,
1176 vec
<slp_tree
> *loads
,
1177 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1178 unsigned max_tree_size
)
1180 unsigned nops
, i
, this_tree_size
= 0;
1181 poly_uint64 this_max_nunits
= *max_nunits
;
1188 if (is_gimple_call (stmt
))
1189 nops
= gimple_call_num_args (stmt
);
1190 else if (is_gimple_assign (stmt
))
1192 nops
= gimple_num_ops (stmt
) - 1;
1193 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1196 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1201 /* If the SLP node is a PHI (induction or reduction), terminate
1203 if (gimple_code (stmt
) == GIMPLE_PHI
)
1205 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1206 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1207 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1211 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1212 /* Induction from different IVs is not supported. */
1213 if (def_type
== vect_induction_def
)
1215 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1216 if (stmt
!= stmts
[0])
1221 /* Else def types have to match. */
1222 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1224 /* But for reduction chains only check on the first stmt. */
1225 if (REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1226 && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1228 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1232 node
= vect_create_new_slp_node (stmts
);
1237 bool two_operators
= false;
1238 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1239 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1240 &this_max_nunits
, matches
, &two_operators
))
1243 /* If the SLP node is a load, terminate the recursion. */
1244 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1245 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1247 *max_nunits
= this_max_nunits
;
1248 node
= vect_create_new_slp_node (stmts
);
1249 loads
->safe_push (node
);
1253 /* Get at the operands, verifying they are compatible. */
1254 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1255 slp_oprnd_info oprnd_info
;
1256 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1258 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1259 stmts
, i
, &oprnds_info
);
1261 matches
[(res
== -1) ? 0 : i
] = false;
1265 for (i
= 0; i
< group_size
; ++i
)
1268 vect_free_oprnd_info (oprnds_info
);
1272 auto_vec
<slp_tree
, 4> children
;
1273 auto_vec
<slp_tree
> this_loads
;
1278 max_tree_size
-= *tree_size
;
1280 /* Create SLP_TREE nodes for the definition node/s. */
1281 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1284 unsigned old_nloads
= this_loads
.length ();
1285 unsigned old_tree_size
= this_tree_size
;
1288 if (oprnd_info
->first_dt
!= vect_internal_def
1289 && oprnd_info
->first_dt
!= vect_reduction_def
1290 && oprnd_info
->first_dt
!= vect_induction_def
)
1293 if (++this_tree_size
> max_tree_size
)
1295 FOR_EACH_VEC_ELT (children
, j
, child
)
1296 vect_free_slp_tree (child
, false);
1297 vect_free_oprnd_info (oprnds_info
);
1301 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1302 group_size
, &this_max_nunits
,
1303 &this_loads
, matches
, npermutes
,
1305 max_tree_size
)) != NULL
)
1307 /* If we have all children of child built up from scalars then just
1308 throw that away and build it up this node from scalars. */
1309 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1310 /* ??? Rejecting patterns this way doesn't work. We'd have to
1311 do extra work to cancel the pattern so the uses see the
1313 && !is_pattern_stmt_p
1314 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1316 slp_tree grandchild
;
1318 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1319 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1324 this_loads
.truncate (old_nloads
);
1325 this_tree_size
= old_tree_size
;
1326 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1327 vect_free_slp_tree (grandchild
, false);
1328 SLP_TREE_CHILDREN (child
).truncate (0);
1330 dump_printf_loc (MSG_NOTE
, vect_location
,
1331 "Building parent vector operands from "
1332 "scalars instead\n");
1333 oprnd_info
->def_stmts
= vNULL
;
1334 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1335 children
.safe_push (child
);
1340 oprnd_info
->def_stmts
= vNULL
;
1341 children
.safe_push (child
);
1345 /* If the SLP build failed fatally and we analyze a basic-block
1346 simply treat nodes we fail to build as externally defined
1347 (and thus build vectors from the scalar defs).
1348 The cost model will reject outright expensive cases.
1349 ??? This doesn't treat cases where permutation ultimatively
1350 fails (or we don't try permutation below). Ideally we'd
1351 even compute a permutation that will end up with the maximum
1353 if (is_a
<bb_vec_info
> (vinfo
)
1355 /* ??? Rejecting patterns this way doesn't work. We'd have to
1356 do extra work to cancel the pattern so the uses see the
1358 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1360 dump_printf_loc (MSG_NOTE
, vect_location
,
1361 "Building vector operands from scalars\n");
1362 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1363 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1364 children
.safe_push (child
);
1365 oprnd_info
->def_stmts
= vNULL
;
1369 /* If the SLP build for operand zero failed and operand zero
1370 and one can be commutated try that for the scalar stmts
1371 that failed the match. */
1373 /* A first scalar stmt mismatch signals a fatal mismatch. */
1375 /* ??? For COND_EXPRs we can swap the comparison operands
1376 as well as the arms under some constraints. */
1378 && oprnds_info
[1]->first_dt
== vect_internal_def
1379 && is_gimple_assign (stmt
)
1380 /* Do so only if the number of not successful permutes was nor more
1381 than a cut-ff as re-trying the recursive match on
1382 possibly each level of the tree would expose exponential
1386 /* See whether we can swap the matching or the non-matching
1388 bool swap_not_matching
= true;
1391 for (j
= 0; j
< group_size
; ++j
)
1393 if (matches
[j
] != !swap_not_matching
)
1395 gimple
*stmt
= stmts
[j
];
1396 /* Verify if we can swap operands of this stmt. */
1397 if (!is_gimple_assign (stmt
)
1398 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1400 if (!swap_not_matching
)
1402 swap_not_matching
= false;
1405 /* Verify if we can safely swap or if we committed to a
1406 specific operand order already.
1407 ??? Instead of modifying GIMPLE stmts here we could
1408 record whether we want to swap operands in the SLP
1409 node and temporarily do that when processing it
1410 (or wrap operand accessors in a helper). */
1411 else if (swap
[j
] != 0
1412 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)))
1414 if (!swap_not_matching
)
1416 if (dump_enabled_p ())
1418 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1420 "Build SLP failed: cannot swap "
1421 "operands of shared stmt ");
1422 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
1423 TDF_SLIM
, stmts
[j
], 0);
1427 swap_not_matching
= false;
1432 while (j
!= group_size
);
1434 /* Swap mismatched definition stmts. */
1435 dump_printf_loc (MSG_NOTE
, vect_location
,
1436 "Re-trying with swapped operands of stmts ");
1437 for (j
= 0; j
< group_size
; ++j
)
1438 if (matches
[j
] == !swap_not_matching
)
1440 std::swap (oprnds_info
[0]->def_stmts
[j
],
1441 oprnds_info
[1]->def_stmts
[j
]);
1442 dump_printf (MSG_NOTE
, "%d ", j
);
1444 dump_printf (MSG_NOTE
, "\n");
1445 /* And try again with scratch 'matches' ... */
1446 bool *tem
= XALLOCAVEC (bool, group_size
);
1447 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1448 group_size
, &this_max_nunits
,
1449 &this_loads
, tem
, npermutes
,
1451 max_tree_size
)) != NULL
)
1453 /* ... so if successful we can apply the operand swapping
1454 to the GIMPLE IL. This is necessary because for example
1455 vect_get_slp_defs uses operand indexes and thus expects
1456 canonical operand order. This is also necessary even
1457 if we end up building the operand from scalars as
1458 we'll continue to process swapped operand two. */
1459 for (j
= 0; j
< group_size
; ++j
)
1461 gimple
*stmt
= stmts
[j
];
1462 gimple_set_plf (stmt
, GF_PLF_1
, false);
1464 for (j
= 0; j
< group_size
; ++j
)
1466 gimple
*stmt
= stmts
[j
];
1467 if (matches
[j
] == !swap_not_matching
)
1469 /* Avoid swapping operands twice. */
1470 if (gimple_plf (stmt
, GF_PLF_1
))
1472 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1473 gimple_assign_rhs2_ptr (stmt
));
1474 gimple_set_plf (stmt
, GF_PLF_1
, true);
1477 /* Verify we swap all duplicates or none. */
1479 for (j
= 0; j
< group_size
; ++j
)
1481 gimple
*stmt
= stmts
[j
];
1482 gcc_assert (gimple_plf (stmt
, GF_PLF_1
)
1483 == (matches
[j
] == !swap_not_matching
));
1486 /* If we have all children of child built up from scalars then
1487 just throw that away and build it up this node from scalars. */
1488 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1489 /* ??? Rejecting patterns this way doesn't work. We'd have
1490 to do extra work to cancel the pattern so the uses see the
1492 && !is_pattern_stmt_p
1493 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1496 slp_tree grandchild
;
1498 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1499 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1504 this_loads
.truncate (old_nloads
);
1505 this_tree_size
= old_tree_size
;
1506 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1507 vect_free_slp_tree (grandchild
, false);
1508 SLP_TREE_CHILDREN (child
).truncate (0);
1510 dump_printf_loc (MSG_NOTE
, vect_location
,
1511 "Building parent vector operands from "
1512 "scalars instead\n");
1513 oprnd_info
->def_stmts
= vNULL
;
1514 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1515 children
.safe_push (child
);
1520 oprnd_info
->def_stmts
= vNULL
;
1521 children
.safe_push (child
);
1529 gcc_assert (child
== NULL
);
1530 FOR_EACH_VEC_ELT (children
, j
, child
)
1531 vect_free_slp_tree (child
, false);
1532 vect_free_oprnd_info (oprnds_info
);
1536 vect_free_oprnd_info (oprnds_info
);
1539 *tree_size
+= this_tree_size
;
1540 *max_nunits
= this_max_nunits
;
1541 loads
->safe_splice (this_loads
);
1543 node
= vect_create_new_slp_node (stmts
);
1544 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1545 SLP_TREE_CHILDREN (node
).splice (children
);
1549 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1552 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1559 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1560 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1561 ? " (external)" : "");
1562 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1564 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1565 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1567 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1568 vect_print_slp_tree (dump_kind
, loc
, child
);
1572 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1573 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1574 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1575 stmts in NODE are to be marked. */
1578 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1584 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1587 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1588 if (j
< 0 || i
== j
)
1589 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1591 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1592 vect_mark_slp_stmts (child
, mark
, j
);
1596 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1599 vect_mark_slp_stmts_relevant (slp_tree node
)
1603 stmt_vec_info stmt_info
;
1606 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1609 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1611 stmt_info
= vinfo_for_stmt (stmt
);
1612 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1613 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1614 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1617 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1618 vect_mark_slp_stmts_relevant (child
);
1622 /* Rearrange the statements of NODE according to PERMUTATION. */
1625 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1626 vec
<unsigned> permutation
)
1629 vec
<gimple
*> tmp_stmts
;
1633 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1634 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1636 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1637 tmp_stmts
.create (group_size
);
1638 tmp_stmts
.quick_grow_cleared (group_size
);
1640 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1641 tmp_stmts
[permutation
[i
]] = stmt
;
1643 SLP_TREE_SCALAR_STMTS (node
).release ();
1644 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1648 /* Attempt to reorder stmts in a reduction chain so that we don't
1649 require any load permutation. Return true if that was possible,
1650 otherwise return false. */
1653 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1655 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1658 slp_tree node
, load
;
1660 /* Compare all the permutation sequences to the first one. We know
1661 that at least one load is permuted. */
1662 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1663 if (!node
->load_permutation
.exists ())
1665 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1667 if (!load
->load_permutation
.exists ())
1669 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1670 if (lidx
!= node
->load_permutation
[j
])
1674 /* Check that the loads in the first sequence are different and there
1675 are no gaps between them. */
1676 auto_sbitmap
load_index (group_size
);
1677 bitmap_clear (load_index
);
1678 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1680 if (lidx
>= group_size
)
1682 if (bitmap_bit_p (load_index
, lidx
))
1685 bitmap_set_bit (load_index
, lidx
);
1687 for (i
= 0; i
< group_size
; i
++)
1688 if (!bitmap_bit_p (load_index
, i
))
1691 /* This permutation is valid for reduction. Since the order of the
1692 statements in the nodes is not important unless they are memory
1693 accesses, we can rearrange the statements in all the nodes
1694 according to the order of the loads. */
1695 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1696 node
->load_permutation
);
1698 /* We are done, no actual permutations need to be generated. */
1699 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1700 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1702 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1703 first_stmt
= DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1704 /* But we have to keep those permutations that are required because
1705 of handling of gaps. */
1706 if (known_eq (unrolling_factor
, 1U)
1707 || (group_size
== DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1708 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1709 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1711 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1712 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1718 /* Check if the required load permutations in the SLP instance
1719 SLP_INSTN are supported. */
1722 vect_supported_load_permutation_p (slp_instance slp_instn
)
1724 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1725 unsigned int i
, j
, k
, next
;
1727 gimple
*stmt
, *load
, *next_load
;
1729 if (dump_enabled_p ())
1731 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1732 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1733 if (node
->load_permutation
.exists ())
1734 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1735 dump_printf (MSG_NOTE
, "%d ", next
);
1737 for (k
= 0; k
< group_size
; ++k
)
1738 dump_printf (MSG_NOTE
, "%d ", k
);
1739 dump_printf (MSG_NOTE
, "\n");
1742 /* In case of reduction every load permutation is allowed, since the order
1743 of the reduction statements is not important (as opposed to the case of
1744 grouped stores). The only condition we need to check is that all the
1745 load nodes are of the same size and have the same permutation (and then
1746 rearrange all the nodes of the SLP instance according to this
1749 /* Check that all the load nodes are of the same size. */
1750 /* ??? Can't we assert this? */
1751 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1752 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1755 node
= SLP_INSTANCE_TREE (slp_instn
);
1756 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1758 /* Reduction (there are no data-refs in the root).
1759 In reduction chain the order of the loads is not important. */
1760 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1761 && !REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1762 vect_attempt_slp_rearrange_stmts (slp_instn
);
1764 /* In basic block vectorization we allow any subchain of an interleaving
1766 FORNOW: not supported in loop SLP because of realignment compications. */
1767 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1769 /* Check whether the loads in an instance form a subchain and thus
1770 no permutation is necessary. */
1771 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1773 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1775 bool subchain_p
= true;
1777 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1780 && (next_load
!= load
1781 || DR_GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1786 next_load
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1789 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1792 stmt_vec_info group_info
1793 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1794 group_info
= vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (group_info
));
1795 unsigned HOST_WIDE_INT nunits
;
1796 unsigned k
, maxk
= 0;
1797 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1800 /* In BB vectorization we may not actually use a loaded vector
1801 accessing elements in excess of DR_GROUP_SIZE. */
1802 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1803 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1804 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1807 "BB vectorization with gaps at the end of "
1808 "a load is not supported\n");
1812 /* Verify the permutation can be generated. */
1815 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1816 1, slp_instn
, true, &n_perms
))
1818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1820 "unsupported load permutation\n");
1828 /* For loop vectorization verify we can generate the permutation. Be
1829 conservative about the vectorization factor, there are permutations
1830 that will use three vector inputs only starting from a specific factor
1831 and the vectorization factor is not yet final.
1832 ??? The SLP instance unrolling factor might not be the maximum one. */
1835 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1836 LOOP_VINFO_VECT_FACTOR
1837 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1838 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1839 if (node
->load_permutation
.exists ()
1840 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1841 slp_instn
, true, &n_perms
))
1848 /* Find the last store in SLP INSTANCE. */
1851 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1853 gimple
*last
= NULL
, *stmt
;
1855 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1857 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1858 if (is_pattern_stmt_p (stmt_vinfo
))
1859 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1861 last
= get_later_stmt (stmt
, last
);
1867 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1868 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1869 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1870 containing the remainder.
1871 Return the first stmt in the second group. */
1874 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1876 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1877 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1878 gcc_assert (group1_size
> 0);
1879 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1880 gcc_assert (group2_size
> 0);
1881 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1883 gimple
*stmt
= first_stmt
;
1884 for (unsigned i
= group1_size
; i
> 1; i
--)
1886 stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1887 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1889 /* STMT is now the last element of the first group. */
1890 gimple
*group2
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1891 DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1893 DR_GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1894 for (stmt
= group2
; stmt
; stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1896 DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1897 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1900 /* For the second group, the DR_GROUP_GAP is that before the original group,
1901 plus skipping over the first vector. */
1902 DR_GROUP_GAP (vinfo_for_stmt (group2
))
1903 = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1905 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1906 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1908 if (dump_enabled_p ())
1909 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1910 group1_size
, group2_size
);
1915 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1916 statements and a vector of NUNITS elements. */
1919 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1921 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1924 /* Analyze an SLP instance starting from a group of grouped stores. Call
1925 vect_build_slp_tree to build a tree of packed stmts if possible.
1926 Return FALSE if it's impossible to SLP any stmt in the loop. */
1929 vect_analyze_slp_instance (vec_info
*vinfo
,
1930 gimple
*stmt
, unsigned max_tree_size
)
1932 slp_instance new_instance
;
1934 unsigned int group_size
;
1935 tree vectype
, scalar_type
= NULL_TREE
;
1938 vec
<slp_tree
> loads
;
1939 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1940 vec
<gimple
*> scalar_stmts
;
1942 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1944 scalar_type
= TREE_TYPE (DR_REF (dr
));
1945 vectype
= get_vectype_for_scalar_type (scalar_type
);
1946 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
1948 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1950 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1951 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1952 group_size
= REDUC_GROUP_SIZE (vinfo_for_stmt (stmt
));
1956 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1957 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1958 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1963 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1966 "Build SLP failed: unsupported data-type ");
1967 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1968 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1973 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1975 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1976 scalar_stmts
.create (group_size
);
1978 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1980 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1983 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1984 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1985 scalar_stmts
.safe_push (
1986 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1988 scalar_stmts
.safe_push (next
);
1989 next
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1992 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1994 /* Collect the reduction stmts and store them in
1995 SLP_TREE_SCALAR_STMTS. */
1998 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1999 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
2000 scalar_stmts
.safe_push (
2001 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
2003 scalar_stmts
.safe_push (next
);
2004 next
= REDUC_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2006 /* Mark the first element of the reduction chain as reduction to properly
2007 transform the node. In the reduction analysis phase only the last
2008 element of the chain is marked as reduction. */
2009 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2013 /* Collect reduction statements. */
2014 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2015 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2016 scalar_stmts
.safe_push (next
);
2019 loads
.create (group_size
);
2021 /* Build the tree for the SLP instance. */
2022 bool *matches
= XALLOCAVEC (bool, group_size
);
2023 unsigned npermutes
= 0;
2024 bst_fail
= new scalar_stmts_set_t ();
2025 poly_uint64 max_nunits
= nunits
;
2026 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2027 &max_nunits
, &loads
, matches
, &npermutes
,
2028 NULL
, max_tree_size
);
2032 /* Calculate the unrolling factor based on the smallest type. */
2033 poly_uint64 unrolling_factor
2034 = calculate_unrolling_factor (max_nunits
, group_size
);
2036 if (maybe_ne (unrolling_factor
, 1U)
2037 && is_a
<bb_vec_info
> (vinfo
))
2039 unsigned HOST_WIDE_INT const_max_nunits
;
2040 if (!max_nunits
.is_constant (&const_max_nunits
)
2041 || const_max_nunits
> group_size
)
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2045 "Build SLP failed: store group "
2046 "size not a multiple of the vector size "
2047 "in basic block SLP\n");
2048 vect_free_slp_tree (node
, false);
2052 /* Fatal mismatch. */
2053 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2054 vect_free_slp_tree (node
, false);
2059 /* Create a new SLP instance. */
2060 new_instance
= XNEW (struct _slp_instance
);
2061 SLP_INSTANCE_TREE (new_instance
) = node
;
2062 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2063 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2064 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2066 /* Compute the load permutation. */
2068 bool loads_permuted
= false;
2069 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2071 vec
<unsigned> load_permutation
;
2073 gimple
*load
, *first_stmt
;
2074 bool this_load_permuted
= false;
2075 load_permutation
.create (group_size
);
2076 first_stmt
= DR_GROUP_FIRST_ELEMENT
2077 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2078 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2080 int load_place
= vect_get_place_in_interleaving_chain
2082 gcc_assert (load_place
!= -1);
2083 if (load_place
!= j
)
2084 this_load_permuted
= true;
2085 load_permutation
.safe_push (load_place
);
2087 if (!this_load_permuted
2088 /* The load requires permutation when unrolling exposes
2089 a gap either because the group is larger than the SLP
2090 group-size or because there is a gap between the groups. */
2091 && (known_eq (unrolling_factor
, 1U)
2092 || (group_size
== DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2093 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2095 load_permutation
.release ();
2098 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2099 loads_permuted
= true;
2104 if (!vect_supported_load_permutation_p (new_instance
))
2106 if (dump_enabled_p ())
2108 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2109 "Build SLP failed: unsupported load "
2111 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2114 vect_free_slp_instance (new_instance
, false);
2119 /* If the loads and stores can be handled with load/store-lan
2120 instructions do not generate this SLP instance. */
2121 if (is_a
<loop_vec_info
> (vinfo
)
2123 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2126 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2128 gimple
*first_stmt
= DR_GROUP_FIRST_ELEMENT
2129 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2130 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2131 /* Use SLP for strided accesses (or if we
2132 can't load-lanes). */
2133 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2134 || ! vect_load_lanes_supported
2135 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2136 DR_GROUP_SIZE (stmt_vinfo
), false))
2139 if (i
== loads
.length ())
2141 if (dump_enabled_p ())
2142 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2143 "Built SLP cancelled: can use "
2144 "load/store-lanes\n");
2145 vect_free_slp_instance (new_instance
, false);
2150 vinfo
->slp_instances
.safe_push (new_instance
);
2152 if (dump_enabled_p ())
2154 dump_printf_loc (MSG_NOTE
, vect_location
,
2155 "Final SLP tree for instance:\n");
2156 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2164 /* Failed to SLP. */
2165 /* Free the allocated memory. */
2166 scalar_stmts
.release ();
2170 /* For basic block SLP, try to break the group up into multiples of the
2172 unsigned HOST_WIDE_INT const_nunits
;
2173 if (is_a
<bb_vec_info
> (vinfo
)
2174 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
2175 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2176 && nunits
.is_constant (&const_nunits
))
2178 /* We consider breaking the group only on VF boundaries from the existing
2180 for (i
= 0; i
< group_size
; i
++)
2181 if (!matches
[i
]) break;
2183 if (i
>= const_nunits
&& i
< group_size
)
2185 /* Split into two groups at the first vector boundary before i. */
2186 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2187 unsigned group1_size
= i
& ~(const_nunits
- 1);
2189 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2190 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2191 /* If the first non-match was in the middle of a vector,
2192 skip the rest of that vector. */
2193 if (group1_size
< i
)
2195 i
= group1_size
+ const_nunits
;
2197 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2200 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2203 /* Even though the first vector did not all match, we might be able to SLP
2204 (some) of the remainder. FORNOW ignore this possibility. */
2211 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2212 trees of packed scalar stmts if SLP is possible. */
2215 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2218 gimple
*first_element
;
2220 DUMP_VECT_SCOPE ("vect_analyze_slp");
2222 /* Find SLP sequences starting from groups of grouped stores. */
2223 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2224 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2226 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2228 if (loop_vinfo
->reduction_chains
.length () > 0)
2230 /* Find SLP sequences starting from reduction chains. */
2231 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2232 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2235 /* Dissolve reduction chain group. */
2236 gimple
*next
, *stmt
= first_element
;
2239 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2240 next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2241 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2242 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2245 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2246 = vect_internal_def
;
2250 /* Find SLP sequences starting from groups of reductions. */
2251 if (loop_vinfo
->reductions
.length () > 1)
2252 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2260 /* For each possible SLP instance decide whether to SLP it and calculate overall
2261 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2262 least one instance. */
2265 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2268 poly_uint64 unrolling_factor
= 1;
2269 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2270 slp_instance instance
;
2271 int decided_to_slp
= 0;
2273 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2275 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2277 /* FORNOW: SLP if you can. */
2278 /* All unroll factors have the form current_vector_size * X for some
2279 rational X, so they must have a common multiple. */
2281 = force_common_multiple (unrolling_factor
,
2282 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2284 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2285 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2286 loop-based vectorization. Such stmts will be marked as HYBRID. */
2287 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2291 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2293 if (decided_to_slp
&& dump_enabled_p ())
2295 dump_printf_loc (MSG_NOTE
, vect_location
,
2296 "Decided to SLP %d instances. Unrolling factor ",
2298 dump_dec (MSG_NOTE
, unrolling_factor
);
2299 dump_printf (MSG_NOTE
, "\n");
2302 return (decided_to_slp
> 0);
2306 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2307 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2310 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2312 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2313 imm_use_iterator imm_iter
;
2315 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2317 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2320 /* Propagate hybrid down the SLP tree. */
2321 if (stype
== hybrid
)
2323 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2327 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2328 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2329 /* If we get a pattern stmt here we have to use the LHS of the
2330 original stmt for immediate uses. */
2331 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2332 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2333 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2335 if (gimple_code (stmt
) == GIMPLE_PHI
)
2336 def
= gimple_phi_result (stmt
);
2338 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2340 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2342 use_vinfo
= loop_vinfo
->lookup_stmt (use_stmt
);
2345 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2346 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2347 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2348 if (!STMT_SLP_TYPE (use_vinfo
)
2349 && (STMT_VINFO_RELEVANT (use_vinfo
)
2350 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2351 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2352 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2354 if (dump_enabled_p ())
2356 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2357 "def in non-SLP stmt: ");
2358 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2366 && !HYBRID_SLP_STMT (stmt_vinfo
))
2368 if (dump_enabled_p ())
2370 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2371 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2373 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2376 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2377 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2378 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2381 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2384 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2386 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2387 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2392 stmt_vec_info def_stmt_info
= loop_vinfo
->lookup_def (*tp
);
2393 if (def_stmt_info
&& PURE_SLP_STMT (def_stmt_info
))
2395 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2398 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt_info
->stmt
, 0);
2400 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2407 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2410 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2411 stmt_vec_info use_vinfo
= loop_vinfo
->lookup_stmt (gsi_stmt (*gsi
));
2412 /* If the stmt is in a SLP instance then this isn't a reason
2413 to mark use definitions in other SLP instances as hybrid. */
2414 if (! STMT_SLP_TYPE (use_vinfo
)
2415 && (STMT_VINFO_RELEVANT (use_vinfo
)
2416 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2417 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2418 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2425 /* Find stmts that must be both vectorized and SLPed. */
2428 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2431 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2432 slp_instance instance
;
2434 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2436 /* First walk all pattern stmt in the loop and mark defs of uses as
2437 hybrid because immediate uses in them are not recorded. */
2438 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2440 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2441 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2444 gimple
*stmt
= gsi_stmt (gsi
);
2445 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2446 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2449 memset (&wi
, 0, sizeof (wi
));
2450 wi
.info
= loop_vinfo
;
2451 gimple_stmt_iterator gsi2
2452 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2453 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2454 vect_detect_hybrid_slp_1
, &wi
);
2455 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2456 vect_detect_hybrid_slp_2
,
2457 vect_detect_hybrid_slp_1
, &wi
);
2462 /* Then walk the SLP instance trees marking stmts with uses in
2463 non-SLP stmts as hybrid, also propagating hybrid down the
2464 SLP tree, collecting the above info on-the-fly. */
2465 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2467 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2468 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2474 /* Initialize a bb_vec_info struct for the statements between
2475 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2477 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2478 gimple_stmt_iterator region_end_in
,
2479 vec_info_shared
*shared
)
2480 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2481 bb (gsi_bb (region_begin_in
)),
2482 region_begin (region_begin_in
),
2483 region_end (region_end_in
)
2485 gimple_stmt_iterator gsi
;
2487 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2490 gimple
*stmt
= gsi_stmt (gsi
);
2491 gimple_set_uid (stmt
, 0);
2499 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2500 stmts in the basic block. */
2502 _bb_vec_info::~_bb_vec_info ()
2504 for (gimple_stmt_iterator si
= region_begin
;
2505 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2507 gimple
*stmt
= gsi_stmt (si
);
2508 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2511 /* Free stmt_vec_info. */
2512 free_stmt_vec_info (stmt
);
2514 /* Reset region marker. */
2515 gimple_set_uid (stmt
, -1);
2521 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2522 given then that child nodes have already been processed, and that
2523 their def types currently match their SLP node's def type. */
2526 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2527 slp_instance node_instance
,
2528 stmt_vector_for_cost
*cost_vec
)
2530 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2531 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2532 gcc_assert (stmt_info
);
2533 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2535 /* For BB vectorization vector types are assigned here.
2536 Memory accesses already got their vector type assigned
2537 in vect_analyze_data_refs. */
2538 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2540 && ! STMT_VINFO_DATA_REF (stmt_info
))
2542 tree vectype
, nunits_vectype
;
2543 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
2545 /* We checked this when building the node. */
2547 if (vectype
== boolean_type_node
)
2549 vectype
= vect_get_mask_type_for_stmt (stmt_info
);
2551 /* vect_get_mask_type_for_stmt has already explained the
2558 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2559 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2562 /* Calculate the number of vector statements to be created for the
2563 scalar stmts in this node. For SLP reductions it is equal to the
2564 number of vector statements in the children (which has already been
2565 calculated by the recursive call). Otherwise it is the number of
2566 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2567 VF divided by the number of elements in a vector. */
2568 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2569 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2570 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2571 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2575 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2576 vf
= loop_vinfo
->vectorization_factor
;
2579 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2580 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2581 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2582 = vect_get_num_vectors (vf
* group_size
, vectype
);
2586 return vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
, cost_vec
);
2589 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2590 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2592 Return true if the operations are supported. */
2595 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2596 slp_instance node_instance
,
2597 scalar_stmts_to_slp_tree_map_t
*visited
,
2598 scalar_stmts_to_slp_tree_map_t
*lvisited
,
2599 stmt_vector_for_cost
*cost_vec
)
2604 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2607 /* If we already analyzed the exact same set of scalar stmts we're done.
2608 We share the generated vector stmts for those. */
2610 if ((leader
= visited
->get (SLP_TREE_SCALAR_STMTS (node
)))
2611 || (leader
= lvisited
->get (SLP_TREE_SCALAR_STMTS (node
))))
2613 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2614 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader
);
2618 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2619 doesn't result in any issue since we throw away the lvisited set
2621 lvisited
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
2623 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2624 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2625 visited
, lvisited
, cost_vec
))
2628 /* Push SLP node def-type to stmt operands. */
2629 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2630 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2631 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2632 = SLP_TREE_DEF_TYPE (child
);
2633 bool res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2635 /* Restore def-types. */
2636 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2637 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2638 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2639 = vect_internal_def
;
2647 /* Analyze statements in SLP instances of VINFO. Return true if the
2648 operations are supported. */
2651 vect_slp_analyze_operations (vec_info
*vinfo
)
2653 slp_instance instance
;
2656 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2658 scalar_stmts_to_slp_tree_map_t
*visited
2659 = new scalar_stmts_to_slp_tree_map_t ();
2660 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2662 scalar_stmts_to_slp_tree_map_t lvisited
;
2663 stmt_vector_for_cost cost_vec
;
2664 cost_vec
.create (2);
2665 if (!vect_slp_analyze_node_operations (vinfo
,
2666 SLP_INSTANCE_TREE (instance
),
2667 instance
, visited
, &lvisited
,
2670 dump_printf_loc (MSG_NOTE
, vect_location
,
2671 "removing SLP instance operations starting from: ");
2672 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2673 SLP_TREE_SCALAR_STMTS
2674 (SLP_INSTANCE_TREE (instance
))[0], 0);
2675 vect_free_slp_instance (instance
, false);
2676 vinfo
->slp_instances
.ordered_remove (i
);
2677 cost_vec
.release ();
2681 for (scalar_stmts_to_slp_tree_map_t::iterator x
= lvisited
.begin();
2682 x
!= lvisited
.end(); ++x
)
2683 visited
->put ((*x
).first
.copy (), (*x
).second
);
2686 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2687 cost_vec
.release ();
2692 return !vinfo
->slp_instances
.is_empty ();
2696 /* Compute the scalar cost of the SLP node NODE and its children
2697 and return it. Do not account defs that are marked in LIFE and
2698 update LIFE according to uses of NODE. */
2701 vect_bb_slp_scalar_cost (basic_block bb
,
2702 slp_tree node
, vec
<bool, va_heap
> *life
,
2703 stmt_vector_for_cost
*cost_vec
)
2709 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2711 ssa_op_iter op_iter
;
2712 def_operand_p def_p
;
2713 stmt_vec_info stmt_info
;
2718 /* If there is a non-vectorized use of the defs then the scalar
2719 stmt is kept live in which case we do not account it or any
2720 required defs in the SLP children in the scalar cost. This
2721 way we make the vectorization more costly when compared to
2723 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2725 imm_use_iterator use_iter
;
2727 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2728 if (!is_gimple_debug (use_stmt
)
2729 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2731 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2734 BREAK_FROM_IMM_USE_STMT (use_iter
);
2740 /* Count scalar stmts only once. */
2741 if (gimple_visited_p (stmt
))
2743 gimple_set_visited (stmt
, true);
2745 stmt_info
= vinfo_for_stmt (stmt
);
2746 vect_cost_for_stmt kind
;
2747 if (STMT_VINFO_DATA_REF (stmt_info
))
2749 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2752 kind
= scalar_store
;
2756 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2759 auto_vec
<bool, 20> subtree_life
;
2760 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2762 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2764 /* Do not directly pass LIFE to the recursive call, copy it to
2765 confine changes in the callee to the current child/subtree. */
2766 subtree_life
.safe_splice (*life
);
2767 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
);
2768 subtree_life
.truncate (0);
2773 /* Check if vectorization of the basic block is profitable. */
2776 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2778 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2779 slp_instance instance
;
2781 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2782 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2784 /* Calculate scalar cost. */
2785 stmt_vector_for_cost scalar_costs
;
2786 scalar_costs
.create (0);
2787 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2789 auto_vec
<bool, 20> life
;
2790 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2791 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2792 SLP_INSTANCE_TREE (instance
),
2793 &life
, &scalar_costs
);
2795 void *target_cost_data
= init_cost (NULL
);
2796 add_stmt_costs (target_cost_data
, &scalar_costs
);
2797 scalar_costs
.release ();
2799 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2800 destroy_cost_data (target_cost_data
);
2802 /* Unset visited flag. */
2803 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2804 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2805 gimple_set_visited (gsi_stmt (gsi
), false);
2807 /* Complete the target-specific cost calculation. */
2808 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2809 &vec_inside_cost
, &vec_epilogue_cost
);
2811 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2813 if (dump_enabled_p ())
2815 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2816 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2818 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2819 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2820 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2823 /* Vectorization is profitable if its cost is more than the cost of scalar
2824 version. Note that we err on the vector side for equal cost because
2825 the cost estimate is otherwise quite pessimistic (constant uses are
2826 free on the scalar side but cost a load on the vector side for
2828 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2834 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2835 if so and sets fatal to true if failure is independent of
2836 current_vector_size. */
2839 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2840 gimple_stmt_iterator region_end
,
2841 vec
<data_reference_p
> datarefs
, int n_stmts
,
2842 bool &fatal
, vec_info_shared
*shared
)
2844 bb_vec_info bb_vinfo
;
2845 slp_instance instance
;
2847 poly_uint64 min_vf
= 2;
2849 /* The first group of checks is independent of the vector size. */
2852 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2854 if (dump_enabled_p ())
2855 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2856 "not vectorized: too many instructions in "
2858 free_data_refs (datarefs
);
2862 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, shared
);
2866 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2867 bb_vinfo
->shared
->save_datarefs ();
2869 /* Analyze the data references. */
2871 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2873 if (dump_enabled_p ())
2874 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2875 "not vectorized: unhandled data-ref in basic "
2882 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2884 if (dump_enabled_p ())
2885 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2886 "not vectorized: not enough data-refs in "
2893 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2895 if (dump_enabled_p ())
2896 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2897 "not vectorized: unhandled data access in "
2904 /* If there are no grouped stores in the region there is no need
2905 to continue with pattern recog as vect_analyze_slp will fail
2907 if (bb_vinfo
->grouped_stores
.is_empty ())
2909 if (dump_enabled_p ())
2910 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2911 "not vectorized: no grouped stores in "
2918 /* While the rest of the analysis below depends on it in some way. */
2921 vect_pattern_recog (bb_vinfo
);
2923 /* Check the SLP opportunities in the basic block, analyze and build SLP
2925 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2927 if (dump_enabled_p ())
2929 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2930 "Failed to SLP the basic block.\n");
2931 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2932 "not vectorized: failed to find SLP opportunities "
2933 "in basic block.\n");
2940 vect_record_base_alignments (bb_vinfo
);
2942 /* Analyze and verify the alignment of data references and the
2943 dependence in the SLP instances. */
2944 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2946 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2947 || ! vect_slp_analyze_instance_dependence (instance
))
2949 dump_printf_loc (MSG_NOTE
, vect_location
,
2950 "removing SLP instance operations starting from: ");
2951 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2952 SLP_TREE_SCALAR_STMTS
2953 (SLP_INSTANCE_TREE (instance
))[0], 0);
2954 vect_free_slp_instance (instance
, false);
2955 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2959 /* Mark all the statements that we want to vectorize as pure SLP and
2961 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2962 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2966 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2972 if (!vect_slp_analyze_operations (bb_vinfo
))
2974 if (dump_enabled_p ())
2975 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2976 "not vectorized: bad operation in basic block.\n");
2982 /* Cost model: check if the vectorization is worthwhile. */
2983 if (!unlimited_cost_model (NULL
)
2984 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2986 if (dump_enabled_p ())
2987 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2988 "not vectorized: vectorization is not "
2995 if (dump_enabled_p ())
2996 dump_printf_loc (MSG_NOTE
, vect_location
,
2997 "Basic block will be vectorized using SLP\n");
3003 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3004 true if anything in the basic-block was vectorized. */
3007 vect_slp_bb (basic_block bb
)
3009 bb_vec_info bb_vinfo
;
3010 gimple_stmt_iterator gsi
;
3011 bool any_vectorized
= false;
3012 auto_vector_sizes vector_sizes
;
3014 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3016 /* Autodetect first vector size we try. */
3017 current_vector_size
= 0;
3018 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
3019 unsigned int next_size
= 0;
3021 gsi
= gsi_start_bb (bb
);
3023 poly_uint64 autodetected_vector_size
= 0;
3026 if (gsi_end_p (gsi
))
3029 gimple_stmt_iterator region_begin
= gsi
;
3030 vec
<data_reference_p
> datarefs
= vNULL
;
3033 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3035 gimple
*stmt
= gsi_stmt (gsi
);
3036 if (is_gimple_debug (stmt
))
3040 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3041 vect_location
= stmt
;
3043 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3047 /* Skip leading unhandled stmts. */
3048 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3054 gimple_stmt_iterator region_end
= gsi
;
3056 bool vectorized
= false;
3058 vec_info_shared shared
;
3059 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3060 datarefs
, insns
, fatal
, &shared
);
3062 && dbg_cnt (vect_slp
))
3064 if (dump_enabled_p ())
3065 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3067 bb_vinfo
->shared
->check_datarefs ();
3068 vect_schedule_slp (bb_vinfo
);
3070 unsigned HOST_WIDE_INT bytes
;
3071 if (current_vector_size
.is_constant (&bytes
))
3072 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3073 "basic block part vectorized using "
3074 HOST_WIDE_INT_PRINT_UNSIGNED
" byte "
3075 "vectors\n", bytes
);
3077 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3078 "basic block part vectorized using variable "
3079 "length vectors\n");
3085 any_vectorized
|= vectorized
;
3088 autodetected_vector_size
= current_vector_size
;
3090 if (next_size
< vector_sizes
.length ()
3091 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3095 || next_size
== vector_sizes
.length ()
3096 || known_eq (current_vector_size
, 0U)
3097 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3098 vector sizes will fail do not bother iterating. */
3101 if (gsi_end_p (region_end
))
3104 /* Skip the unhandled stmt. */
3107 /* And reset vector sizes. */
3108 current_vector_size
= 0;
3113 /* Try the next biggest vector size. */
3114 current_vector_size
= vector_sizes
[next_size
++];
3115 if (dump_enabled_p ())
3117 dump_printf_loc (MSG_NOTE
, vect_location
,
3118 "***** Re-trying analysis with "
3120 dump_dec (MSG_NOTE
, current_vector_size
);
3121 dump_printf (MSG_NOTE
, "\n");
3129 return any_vectorized
;
3133 /* Return 1 if vector type of boolean constant which is OPNUM
3134 operand in statement STMT is a boolean vector. */
3137 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3139 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3140 enum tree_code code
= gimple_expr_code (stmt
);
3142 enum vect_def_type dt
;
3144 /* For comparison and COND_EXPR type is chosen depending
3145 on the other comparison operand. */
3146 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3149 op
= gimple_assign_rhs1 (stmt
);
3151 op
= gimple_assign_rhs2 (stmt
);
3153 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3156 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3159 if (code
== COND_EXPR
)
3161 tree cond
= gimple_assign_rhs1 (stmt
);
3163 if (TREE_CODE (cond
) == SSA_NAME
)
3166 op
= TREE_OPERAND (cond
, 1);
3168 op
= TREE_OPERAND (cond
, 0);
3170 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3173 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3176 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3179 /* Build a variable-length vector in which the elements in ELTS are repeated
3180 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3181 RESULTS and add any new instructions to SEQ.
3183 The approach we use is:
3185 (1) Find a vector mode VM with integer elements of mode IM.
3187 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3188 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3189 from small vectors to IM.
3191 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3193 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3194 correct byte contents.
3196 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3198 We try to find the largest IM for which this sequence works, in order
3199 to cut down on the number of interleaves. */
3202 duplicate_and_interleave (gimple_seq
*seq
, tree vector_type
, vec
<tree
> elts
,
3203 unsigned int nresults
, vec
<tree
> &results
)
3205 unsigned int nelts
= elts
.length ();
3206 tree element_type
= TREE_TYPE (vector_type
);
3208 /* (1) Find a vector mode VM with integer elements of mode IM. */
3209 unsigned int nvectors
= 1;
3210 tree new_vector_type
;
3212 if (!can_duplicate_and_interleave_p (nelts
, TYPE_MODE (element_type
),
3213 &nvectors
, &new_vector_type
,
3217 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3218 unsigned int partial_nelts
= nelts
/ nvectors
;
3219 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3221 tree_vector_builder partial_elts
;
3222 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3223 pieces
.quick_grow (nvectors
* 2);
3224 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3226 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3227 ELTS' has mode IM. */
3228 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3229 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3230 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3231 tree t
= gimple_build_vector (seq
, &partial_elts
);
3232 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3233 TREE_TYPE (new_vector_type
), t
);
3235 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3236 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3239 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3240 correct byte contents.
3242 We need to repeat the following operation log2(nvectors) times:
3244 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3245 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3247 However, if each input repeats every N elements and the VF is
3248 a multiple of N * 2, the HI result is the same as the LO. */
3249 unsigned int in_start
= 0;
3250 unsigned int out_start
= nvectors
;
3251 unsigned int hi_start
= nvectors
/ 2;
3252 /* A bound on the number of outputs needed to produce NRESULTS results
3253 in the final iteration. */
3254 unsigned int noutputs_bound
= nvectors
* nresults
;
3255 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3257 noutputs_bound
/= 2;
3258 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3259 for (unsigned int i
= 0; i
< limit
; ++i
)
3262 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3265 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3269 tree output
= make_ssa_name (new_vector_type
);
3270 tree input1
= pieces
[in_start
+ (i
/ 2)];
3271 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3272 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3275 gimple_seq_add_stmt (seq
, stmt
);
3276 pieces
[out_start
+ i
] = output
;
3278 std::swap (in_start
, out_start
);
3281 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3282 results
.reserve (nresults
);
3283 for (unsigned int i
= 0; i
< nresults
; ++i
)
3285 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3286 pieces
[in_start
+ i
]));
3288 results
.quick_push (results
[i
- nvectors
]);
3292 /* For constant and loop invariant defs of SLP_NODE this function returns
3293 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3294 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3295 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3296 REDUC_INDEX is the index of the reduction operand in the statements, unless
3300 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3301 vec
<tree
> *vec_oprnds
,
3302 unsigned int op_num
, unsigned int number_of_vectors
)
3304 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3305 gimple
*stmt
= stmts
[0];
3306 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3307 unsigned HOST_WIDE_INT nunits
;
3309 unsigned j
, number_of_places_left_in_vector
;
3312 int group_size
= stmts
.length ();
3313 unsigned int vec_num
, i
;
3314 unsigned number_of_copies
= 1;
3316 voprnds
.create (number_of_vectors
);
3317 bool constant_p
, is_store
;
3318 tree neutral_op
= NULL
;
3319 enum tree_code code
= gimple_expr_code (stmt
);
3320 gimple_seq ctor_seq
= NULL
;
3321 auto_vec
<tree
, 16> permute_results
;
3323 /* Check if vector type is a boolean vector. */
3324 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3325 && vect_mask_constant_operand_p (stmt
, op_num
))
3327 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3329 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3331 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3334 op
= gimple_assign_rhs1 (stmt
);
3341 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3342 created vectors. It is greater than 1 if unrolling is performed.
3344 For example, we have two scalar operands, s1 and s2 (e.g., group of
3345 strided accesses of size two), while NUNITS is four (i.e., four scalars
3346 of this type can be packed in a vector). The output vector will contain
3347 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3350 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3351 containing the operands.
3353 For example, NUNITS is four as before, and the group size is 8
3354 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3355 {s5, s6, s7, s8}. */
3357 /* When using duplicate_and_interleave, we just need one element for
3358 each scalar statement. */
3359 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3360 nunits
= group_size
;
3362 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3364 number_of_places_left_in_vector
= nunits
;
3366 tree_vector_builder
elts (vector_type
, nunits
, 1);
3367 elts
.quick_grow (nunits
);
3368 bool place_after_defs
= false;
3369 for (j
= 0; j
< number_of_copies
; j
++)
3371 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3374 op
= gimple_assign_rhs1 (stmt
);
3381 tree cond
= gimple_assign_rhs1 (stmt
);
3382 if (TREE_CODE (cond
) == SSA_NAME
)
3383 op
= gimple_op (stmt
, op_num
+ 1);
3384 else if (op_num
== 0 || op_num
== 1)
3385 op
= TREE_OPERAND (cond
, op_num
);
3389 op
= gimple_assign_rhs2 (stmt
);
3391 op
= gimple_assign_rhs3 (stmt
);
3397 op
= gimple_call_arg (stmt
, op_num
);
3404 op
= gimple_op (stmt
, op_num
+ 1);
3405 /* Unlike the other binary operators, shifts/rotates have
3406 the shift count being int, instead of the same type as
3407 the lhs, so make sure the scalar is the right type if
3408 we are dealing with vectors of
3409 long long/long/short/char. */
3410 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3411 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3415 op
= gimple_op (stmt
, op_num
+ 1);
3420 /* Create 'vect_ = {op0,op1,...,opn}'. */
3421 number_of_places_left_in_vector
--;
3423 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3425 if (CONSTANT_CLASS_P (op
))
3427 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3429 /* Can't use VIEW_CONVERT_EXPR for booleans because
3430 of possibly different sizes of scalar value and
3432 if (integer_zerop (op
))
3433 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3434 else if (integer_onep (op
))
3435 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3440 op
= fold_unary (VIEW_CONVERT_EXPR
,
3441 TREE_TYPE (vector_type
), op
);
3442 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3446 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3448 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3451 = build_all_ones_cst (TREE_TYPE (vector_type
));
3453 = build_zero_cst (TREE_TYPE (vector_type
));
3454 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3455 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3461 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3464 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3467 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3471 elts
[number_of_places_left_in_vector
] = op
;
3472 if (!CONSTANT_CLASS_P (op
))
3474 if (TREE_CODE (orig_op
) == SSA_NAME
3475 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3476 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3477 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3478 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3479 place_after_defs
= true;
3481 if (number_of_places_left_in_vector
== 0)
3484 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3485 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3486 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3489 if (vec_oprnds
->is_empty ())
3490 duplicate_and_interleave (&ctor_seq
, vector_type
, elts
,
3493 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3496 gimple_stmt_iterator gsi
;
3497 if (place_after_defs
)
3500 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3501 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3504 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3505 if (ctor_seq
!= NULL
)
3507 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3508 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3511 voprnds
.quick_push (init
);
3512 place_after_defs
= false;
3513 number_of_places_left_in_vector
= nunits
;
3515 elts
.new_vector (vector_type
, nunits
, 1);
3516 elts
.quick_grow (nunits
);
3521 /* Since the vectors are created in the reverse order, we should invert
3523 vec_num
= voprnds
.length ();
3524 for (j
= vec_num
; j
!= 0; j
--)
3526 vop
= voprnds
[j
- 1];
3527 vec_oprnds
->quick_push (vop
);
3532 /* In case that VF is greater than the unrolling factor needed for the SLP
3533 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3534 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3535 to replicate the vectors. */
3536 while (number_of_vectors
> vec_oprnds
->length ())
3538 tree neutral_vec
= NULL
;
3543 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3545 vec_oprnds
->quick_push (neutral_vec
);
3549 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3550 vec_oprnds
->quick_push (vop
);
3556 /* Get vectorized definitions from SLP_NODE that contains corresponding
3557 vectorized def-stmts. */
3560 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3563 gimple
*vec_def_stmt
;
3566 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3568 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3570 gcc_assert (vec_def_stmt
);
3571 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3572 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3574 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3575 vec_oprnds
->quick_push (vec_oprnd
);
3580 /* Get vectorized definitions for SLP_NODE.
3581 If the scalar definitions are loop invariants or constants, collect them and
3582 call vect_get_constant_vectors() to create vector stmts.
3583 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3584 must be stored in the corresponding child of SLP_NODE, and we call
3585 vect_get_slp_vect_defs () to retrieve them. */
3588 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3589 vec
<vec
<tree
> > *vec_oprnds
)
3592 int number_of_vects
= 0, i
;
3593 unsigned int child_index
= 0;
3594 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3595 slp_tree child
= NULL
;
3598 bool vectorized_defs
;
3600 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3601 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3603 /* For each operand we check if it has vectorized definitions in a child
3604 node or we need to create them (for invariants and constants). We
3605 check if the LHS of the first stmt of the next child matches OPRND.
3606 If it does, we found the correct child. Otherwise, we call
3607 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3608 to check this child node for the next operand. */
3609 vectorized_defs
= false;
3610 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3612 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3614 /* We have to check both pattern and original def, if available. */
3615 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3617 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3619 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3622 if (gimple_code (first_def
) == GIMPLE_PHI
)
3623 first_def_op
= gimple_phi_result (first_def
);
3625 first_def_op
= gimple_get_lhs (first_def
);
3626 if (operand_equal_p (oprnd
, first_def_op
, 0)
3628 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3630 /* The number of vector defs is determined by the number of
3631 vector statements in the node from which we get those
3633 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3634 vectorized_defs
= true;
3642 if (!vectorized_defs
)
3646 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3647 /* Number of vector stmts was calculated according to LHS in
3648 vect_schedule_slp_instance (), fix it by replacing LHS with
3649 RHS, if necessary. See vect_get_smallest_scalar_type () for
3651 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3653 if (rhs_size_unit
!= lhs_size_unit
)
3655 number_of_vects
*= rhs_size_unit
;
3656 number_of_vects
/= lhs_size_unit
;
3661 /* Allocate memory for vectorized defs. */
3663 vec_defs
.create (number_of_vects
);
3665 /* For reduction defs we call vect_get_constant_vectors (), since we are
3666 looking for initial loop invariant values. */
3667 if (vectorized_defs
)
3668 /* The defs are already vectorized. */
3669 vect_get_slp_vect_defs (child
, &vec_defs
);
3671 /* Build vectors from scalar defs. */
3672 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3675 vec_oprnds
->quick_push (vec_defs
);
3679 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3680 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3681 permute statements for the SLP node NODE of the SLP instance
3682 SLP_NODE_INSTANCE. */
3685 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3686 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3687 slp_instance slp_node_instance
, bool analyze_only
,
3690 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3691 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3692 tree mask_element_type
= NULL_TREE
, mask_type
;
3694 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3695 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3696 unsigned int mask_element
;
3698 unsigned HOST_WIDE_INT nunits
, const_vf
;
3700 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3703 stmt_info
= vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info
));
3705 mode
= TYPE_MODE (vectype
);
3707 /* At the moment, all permutations are represented using per-element
3708 indices, so we can't cope with variable vector lengths or
3709 vectorization factors. */
3710 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
3711 || !vf
.is_constant (&const_vf
))
3714 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3715 same size as the vector element being permuted. */
3716 mask_element_type
= lang_hooks
.types
.type_for_mode
3717 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3718 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3719 vec_perm_builder
mask (nunits
, nunits
, 1);
3720 mask
.quick_grow (nunits
);
3721 vec_perm_indices indices
;
3723 /* Initialize the vect stmts of NODE to properly insert the generated
3726 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3727 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3728 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3730 /* Generate permutation masks for every NODE. Number of masks for each NODE
3731 is equal to GROUP_SIZE.
3732 E.g., we have a group of three nodes with three loads from the same
3733 location in each node, and the vector size is 4. I.e., we have a
3734 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3735 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3736 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3739 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3740 The last mask is illegal since we assume two operands for permute
3741 operation, and the mask element values can't be outside that range.
3742 Hence, the last mask must be converted into {2,5,5,5}.
3743 For the first two permutations we need the first and the second input
3744 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3745 we need the second and the third vectors: {b1,c1,a2,b2} and
3748 int vect_stmts_counter
= 0;
3749 unsigned int index
= 0;
3750 int first_vec_index
= -1;
3751 int second_vec_index
= -1;
3755 for (unsigned int j
= 0; j
< const_vf
; j
++)
3757 for (int k
= 0; k
< group_size
; k
++)
3759 unsigned int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3760 + j
* DR_GROUP_SIZE (stmt_info
));
3761 vec_index
= i
/ nunits
;
3762 mask_element
= i
% nunits
;
3763 if (vec_index
== first_vec_index
3764 || first_vec_index
== -1)
3766 first_vec_index
= vec_index
;
3768 else if (vec_index
== second_vec_index
3769 || second_vec_index
== -1)
3771 second_vec_index
= vec_index
;
3772 mask_element
+= nunits
;
3776 if (dump_enabled_p ())
3778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3779 "permutation requires at "
3780 "least three vectors ");
3781 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3784 gcc_assert (analyze_only
);
3788 gcc_assert (mask_element
< 2 * nunits
);
3789 if (mask_element
!= index
)
3791 mask
[index
++] = mask_element
;
3793 if (index
== nunits
&& !noop_p
)
3795 indices
.new_vector (mask
, 2, nunits
);
3796 if (!can_vec_perm_const_p (mode
, indices
))
3798 if (dump_enabled_p ())
3800 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3802 "unsupported vect permute { ");
3803 for (i
= 0; i
< nunits
; ++i
)
3805 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3806 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3808 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3810 gcc_assert (analyze_only
);
3817 if (index
== nunits
)
3821 tree mask_vec
= NULL_TREE
;
3824 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
3826 if (second_vec_index
== -1)
3827 second_vec_index
= first_vec_index
;
3829 /* Generate the permute statement if necessary. */
3830 tree first_vec
= dr_chain
[first_vec_index
];
3831 tree second_vec
= dr_chain
[second_vec_index
];
3836 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3838 perm_dest
= make_ssa_name (perm_dest
);
3839 perm_stmt
= gimple_build_assign (perm_dest
,
3841 first_vec
, second_vec
,
3843 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3846 /* If mask was NULL_TREE generate the requested
3847 identity transform. */
3848 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3850 /* Store the vector statement in NODE. */
3851 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3855 first_vec_index
= -1;
3856 second_vec_index
= -1;
3865 /* Vectorize SLP instance tree in postorder. */
3868 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3869 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3872 bool grouped_store
, is_store
;
3873 gimple_stmt_iterator si
;
3874 stmt_vec_info stmt_info
;
3875 unsigned int group_size
;
3880 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3883 /* See if we have already vectorized the same set of stmts and reuse their
3884 vectorized stmts. */
3885 if (slp_tree
*leader
= bst_map
->get (SLP_TREE_SCALAR_STMTS (node
)))
3887 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (*leader
));
3891 bst_map
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
3892 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3893 vect_schedule_slp_instance (child
, instance
, bst_map
);
3895 /* Push SLP node def-type to stmts. */
3896 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3897 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3898 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3899 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3901 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3902 stmt_info
= vinfo_for_stmt (stmt
);
3904 /* VECTYPE is the type of the destination. */
3905 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3906 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3907 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3909 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
3910 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3911 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3913 if (dump_enabled_p ())
3915 dump_printf_loc (MSG_NOTE
,vect_location
,
3916 "------>vectorizing SLP node starting from: ");
3917 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3920 /* Vectorized stmts go before the last scalar stmt which is where
3921 all uses are ready. */
3922 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3924 /* Mark the first element of the reduction chain as reduction to properly
3925 transform the node. In the analysis phase only the last element of the
3926 chain is marked as reduction. */
3927 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3928 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
3929 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3931 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3932 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3935 /* Handle two-operation SLP nodes by vectorizing the group with
3936 both operations and then performing a merge. */
3937 if (SLP_TREE_TWO_OPERATORS (node
))
3939 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3940 enum tree_code ocode
= ERROR_MARK
;
3942 vec_perm_builder
mask (group_size
, group_size
, 1);
3943 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3944 if (gimple_assign_rhs_code (ostmt
) != code0
)
3946 mask
.quick_push (1);
3947 ocode
= gimple_assign_rhs_code (ostmt
);
3950 mask
.quick_push (0);
3951 if (ocode
!= ERROR_MARK
)
3956 tree tmask
= NULL_TREE
;
3957 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3958 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3959 SLP_TREE_VEC_STMTS (node
).truncate (0);
3960 gimple_assign_set_rhs_code (stmt
, ocode
);
3961 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3962 gimple_assign_set_rhs_code (stmt
, code0
);
3963 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3964 SLP_TREE_VEC_STMTS (node
).truncate (0);
3965 tree meltype
= build_nonstandard_integer_type
3966 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3967 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3969 for (j
= 0; j
< v0
.length (); ++j
)
3971 /* Enforced by vect_build_slp_tree, which rejects variable-length
3972 vectors for SLP_TREE_TWO_OPERATORS. */
3973 unsigned int const_nunits
= nunits
.to_constant ();
3974 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
3975 for (l
= 0; l
< const_nunits
; ++l
)
3977 if (k
>= group_size
)
3979 tree t
= build_int_cst (meltype
,
3980 mask
[k
++] * const_nunits
+ l
);
3981 melts
.quick_push (t
);
3983 tmask
= melts
.build ();
3985 /* ??? Not all targets support a VEC_PERM_EXPR with a
3986 constant mask that would translate to a vec_merge RTX
3987 (with their vec_perm_const_ok). We can either not
3988 vectorize in that case or let veclower do its job.
3989 Unfortunately that isn't too great and at least for
3990 plus/minus we'd eventually like to match targets
3991 vector addsub instructions. */
3993 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3995 gimple_assign_lhs (v0
[j
]),
3996 gimple_assign_lhs (v1
[j
]), tmask
);
3997 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3998 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
4005 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
4007 /* Restore stmt def-types. */
4008 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4009 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4010 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
4011 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
4016 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4017 For loop vectorization this is done in vectorizable_call, but for SLP
4018 it needs to be deferred until end of vect_schedule_slp, because multiple
4019 SLP instances may refer to the same scalar stmt. */
4022 vect_remove_slp_scalar_calls (slp_tree node
)
4024 gimple
*stmt
, *new_stmt
;
4025 gimple_stmt_iterator gsi
;
4029 stmt_vec_info stmt_info
;
4031 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4034 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4035 vect_remove_slp_scalar_calls (child
);
4037 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
4039 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
4041 stmt_info
= vinfo_for_stmt (stmt
);
4042 if (stmt_info
== NULL_STMT_VEC_INFO
4043 || is_pattern_stmt_p (stmt_info
)
4044 || !PURE_SLP_STMT (stmt_info
))
4046 lhs
= gimple_call_lhs (stmt
);
4047 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4048 set_vinfo_for_stmt (new_stmt
, stmt_info
);
4049 set_vinfo_for_stmt (stmt
, NULL
);
4050 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
4051 gsi
= gsi_for_stmt (stmt
);
4052 gsi_replace (&gsi
, new_stmt
, false);
4053 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4057 /* Generate vector code for all SLP instances in the loop/basic block. */
4060 vect_schedule_slp (vec_info
*vinfo
)
4062 vec
<slp_instance
> slp_instances
;
4063 slp_instance instance
;
4065 bool is_store
= false;
4068 scalar_stmts_to_slp_tree_map_t
*bst_map
4069 = new scalar_stmts_to_slp_tree_map_t ();
4070 slp_instances
= vinfo
->slp_instances
;
4071 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4073 /* Schedule the tree of INSTANCE. */
4074 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
4076 if (dump_enabled_p ())
4077 dump_printf_loc (MSG_NOTE
, vect_location
,
4078 "vectorizing stmts using SLP.\n");
4082 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4084 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4087 gimple_stmt_iterator gsi
;
4089 /* Remove scalar call stmts. Do not do this for basic-block
4090 vectorization as not all uses may be vectorized.
4091 ??? Why should this be necessary? DCE should be able to
4092 remove the stmts itself.
4093 ??? For BB vectorization we can as well remove scalar
4094 stmts starting from the SLP tree root if they have no
4096 if (is_a
<loop_vec_info
> (vinfo
))
4097 vect_remove_slp_scalar_calls (root
);
4099 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
4100 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4102 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
4105 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
4106 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
4107 /* Free the attached stmt_vec_info and remove the stmt. */
4108 gsi
= gsi_for_stmt (store
);
4109 unlink_stmt_vdef (store
);
4110 gsi_remove (&gsi
, true);
4111 release_defs (store
);
4112 free_stmt_vec_info (store
);