1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 #include "dump-context.h"
50 static bool vectorizable_slp_permutation (vec_info
*, gimple_stmt_iterator
*,
51 slp_tree
, stmt_vector_for_cost
*);
53 /* Initialize a SLP node. */
55 _slp_tree::_slp_tree ()
57 SLP_TREE_SCALAR_STMTS (this) = vNULL
;
58 SLP_TREE_SCALAR_OPS (this) = vNULL
;
59 SLP_TREE_VEC_STMTS (this) = vNULL
;
60 SLP_TREE_VEC_DEFS (this) = vNULL
;
61 SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
62 SLP_TREE_CHILDREN (this) = vNULL
;
63 SLP_TREE_LOAD_PERMUTATION (this) = vNULL
;
64 SLP_TREE_LANE_PERMUTATION (this) = vNULL
;
65 SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def
;
66 SLP_TREE_CODE (this) = ERROR_MARK
;
67 SLP_TREE_VECTYPE (this) = NULL_TREE
;
68 SLP_TREE_REPRESENTATIVE (this) = NULL
;
74 /* Tear down a SLP node. */
76 _slp_tree::~_slp_tree ()
78 SLP_TREE_CHILDREN (this).release ();
79 SLP_TREE_SCALAR_STMTS (this).release ();
80 SLP_TREE_SCALAR_OPS (this).release ();
81 SLP_TREE_VEC_STMTS (this).release ();
82 SLP_TREE_VEC_DEFS (this).release ();
83 SLP_TREE_LOAD_PERMUTATION (this).release ();
84 SLP_TREE_LANE_PERMUTATION (this).release ();
87 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
88 FINAL_P is true if we have vectorized the instance or if we have
89 made a final decision not to vectorize the statements in any way. */
92 vect_free_slp_tree (slp_tree node
, bool final_p
)
97 if (--node
->refcnt
!= 0)
100 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
101 vect_free_slp_tree (child
, final_p
);
103 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
104 Some statements might no longer exist, after having been
105 removed by vect_transform_stmt. Updating the remaining
106 statements would be redundant. */
109 stmt_vec_info stmt_info
;
110 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
112 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
113 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
120 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
121 have vectorized the instance or if we have made a final decision not
122 to vectorize the statements in any way. */
125 vect_free_slp_instance (slp_instance instance
, bool final_p
)
127 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
128 SLP_INSTANCE_LOADS (instance
).release ();
133 /* Create an SLP node for SCALAR_STMTS. */
136 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
, unsigned nops
)
138 slp_tree node
= new _slp_tree
;
139 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
140 SLP_TREE_CHILDREN (node
).create (nops
);
141 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
142 SLP_TREE_REPRESENTATIVE (node
) = scalar_stmts
[0];
143 SLP_TREE_LANES (node
) = scalar_stmts
.length ();
146 stmt_vec_info stmt_info
;
147 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
148 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
153 /* Create an SLP node for OPS. */
156 vect_create_new_slp_node (vec
<tree
> ops
)
158 slp_tree node
= new _slp_tree
;
159 SLP_TREE_SCALAR_OPS (node
) = ops
;
160 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
161 SLP_TREE_LANES (node
) = ops
.length ();
166 /* This structure is used in creation of an SLP tree. Each instance
167 corresponds to the same operand in a group of scalar stmts in an SLP
169 typedef struct _slp_oprnd_info
171 /* Def-stmts for the operands. */
172 vec
<stmt_vec_info
> def_stmts
;
175 /* Information about the first statement, its vector def-type, type, the
176 operand itself in case it's constant, and an indication if it's a pattern
179 enum vect_def_type first_dt
;
184 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
186 static vec
<slp_oprnd_info
>
187 vect_create_oprnd_info (int nops
, int group_size
)
190 slp_oprnd_info oprnd_info
;
191 vec
<slp_oprnd_info
> oprnds_info
;
193 oprnds_info
.create (nops
);
194 for (i
= 0; i
< nops
; i
++)
196 oprnd_info
= XNEW (struct _slp_oprnd_info
);
197 oprnd_info
->def_stmts
.create (group_size
);
198 oprnd_info
->ops
.create (group_size
);
199 oprnd_info
->first_dt
= vect_uninitialized_def
;
200 oprnd_info
->first_op_type
= NULL_TREE
;
201 oprnd_info
->any_pattern
= false;
202 oprnds_info
.quick_push (oprnd_info
);
209 /* Free operands info. */
212 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
215 slp_oprnd_info oprnd_info
;
217 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
219 oprnd_info
->def_stmts
.release ();
220 oprnd_info
->ops
.release ();
221 XDELETE (oprnd_info
);
224 oprnds_info
.release ();
228 /* Return true if STMTS contains a pattern statement. */
231 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
233 stmt_vec_info stmt_info
;
235 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
236 if (is_pattern_stmt_p (stmt_info
))
241 /* Return true when all lanes in the external or constant NODE have
245 vect_slp_tree_uniform_p (slp_tree node
)
247 gcc_assert (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
248 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
);
250 /* Pre-exsting vectors. */
251 if (SLP_TREE_SCALAR_OPS (node
).is_empty ())
255 tree op
, first
= NULL_TREE
;
256 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
259 else if (!operand_equal_p (first
, op
, 0))
265 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
266 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
270 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
271 stmt_vec_info first_stmt_info
)
273 stmt_vec_info next_stmt_info
= first_stmt_info
;
276 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
281 if (next_stmt_info
== stmt_info
)
283 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
285 result
+= DR_GROUP_GAP (next_stmt_info
);
287 while (next_stmt_info
);
292 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
293 using the method implemented by duplicate_and_interleave. Return true
294 if so, returning the number of intermediate vectors in *NVECTORS_OUT
295 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
299 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
300 tree elt_type
, unsigned int *nvectors_out
,
301 tree
*vector_type_out
,
304 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
305 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
308 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
309 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
310 unsigned int nvectors
= 1;
313 scalar_int_mode int_mode
;
314 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
315 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
317 /* Get the natural vector type for this SLP group size. */
318 tree int_type
= build_nonstandard_integer_type
319 (GET_MODE_BITSIZE (int_mode
), 1);
321 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
323 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
324 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
325 GET_MODE_SIZE (base_vector_mode
)))
327 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
328 together into elements of type INT_TYPE and using the result
329 to build NVECTORS vectors. */
330 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
331 vec_perm_builder
sel1 (nelts
, 2, 3);
332 vec_perm_builder
sel2 (nelts
, 2, 3);
333 poly_int64 half_nelts
= exact_div (nelts
, 2);
334 for (unsigned int i
= 0; i
< 3; ++i
)
337 sel1
.quick_push (i
+ nelts
);
338 sel2
.quick_push (half_nelts
+ i
);
339 sel2
.quick_push (half_nelts
+ i
+ nelts
);
341 vec_perm_indices
indices1 (sel1
, 2, nelts
);
342 vec_perm_indices
indices2 (sel2
, 2, nelts
);
343 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
344 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
347 *nvectors_out
= nvectors
;
349 *vector_type_out
= vector_type
;
352 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
354 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
361 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
367 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
368 they are of a valid type and that they match the defs of the first stmt of
369 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
370 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
371 indicates swap is required for cond_expr stmts. Specifically, *SWAP
372 is 1 if STMT is cond and operands of comparison need to be swapped;
373 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
374 If there is any operand swap in this function, *SWAP is set to non-zero
376 If there was a fatal error return -1; if the error could be corrected by
377 swapping operands of father node of this one, return 1; if everything is
380 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
381 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
382 vec
<slp_oprnd_info
> *oprnds_info
)
384 stmt_vec_info stmt_info
= stmts
[stmt_num
];
386 unsigned int i
, number_of_oprnds
;
387 enum vect_def_type dt
= vect_uninitialized_def
;
388 slp_oprnd_info oprnd_info
;
389 int first_op_idx
= 1;
390 unsigned int commutative_op
= -1U;
391 bool first_op_cond
= false;
392 bool first
= stmt_num
== 0;
394 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
396 number_of_oprnds
= gimple_call_num_args (stmt
);
398 if (gimple_call_internal_p (stmt
))
400 internal_fn ifn
= gimple_call_internal_fn (stmt
);
401 commutative_op
= first_commutative_argument (ifn
);
403 /* Masked load, only look at mask. */
404 if (ifn
== IFN_MASK_LOAD
)
406 number_of_oprnds
= 1;
407 /* Mask operand index. */
412 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
414 enum tree_code code
= gimple_assign_rhs_code (stmt
);
415 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
416 /* Swap can only be done for cond_expr if asked to, otherwise we
417 could result in different comparison code to the first stmt. */
418 if (code
== COND_EXPR
419 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
421 first_op_cond
= true;
425 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
430 bool swapped
= (*swap
!= 0);
431 gcc_assert (!swapped
|| first_op_cond
);
432 for (i
= 0; i
< number_of_oprnds
; i
++)
437 /* Map indicating how operands of cond_expr should be swapped. */
438 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
439 int *map
= maps
[*swap
];
442 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
443 first_op_idx
), map
[i
]);
445 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
448 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
449 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
450 oprnd
= TREE_OPERAND (oprnd
, 0);
452 oprnd_info
= (*oprnds_info
)[i
];
454 stmt_vec_info def_stmt_info
;
455 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
457 if (dump_enabled_p ())
458 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
459 "Build SLP failed: can't analyze def for %T\n",
465 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
466 oprnd_info
->any_pattern
= true;
470 /* For the swapping logic below force vect_reduction_def
471 for the reduction op in a SLP reduction group. */
472 if (!STMT_VINFO_DATA_REF (stmt_info
)
473 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
474 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
476 dt
= vect_reduction_def
;
477 oprnd_info
->first_dt
= dt
;
478 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
482 /* Not first stmt of the group, check that the def-stmt/s match
483 the def-stmt/s of the first stmt. Allow different definition
484 types for reduction chains: the first stmt must be a
485 vect_reduction_def (a phi node), and the rest
486 end in the reduction chain. */
487 tree type
= TREE_TYPE (oprnd
);
488 if ((oprnd_info
->first_dt
!= dt
489 && !(oprnd_info
->first_dt
== vect_reduction_def
490 && !STMT_VINFO_DATA_REF (stmt_info
)
491 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
493 && !STMT_VINFO_DATA_REF (def_stmt_info
)
494 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
495 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
496 && !((oprnd_info
->first_dt
== vect_external_def
497 || oprnd_info
->first_dt
== vect_constant_def
)
498 && (dt
== vect_external_def
499 || dt
== vect_constant_def
)))
500 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
501 || (!STMT_VINFO_DATA_REF (stmt_info
)
502 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
504 || STMT_VINFO_DATA_REF (def_stmt_info
)
505 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
506 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
507 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
509 /* Try swapping operands if we got a mismatch. */
510 if (i
== commutative_op
&& !swapped
)
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE
, vect_location
,
514 "trying swapped operands\n");
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
521 "Build SLP failed: different types\n");
525 if ((dt
== vect_constant_def
526 || dt
== vect_external_def
)
527 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
528 && (TREE_CODE (type
) == BOOLEAN_TYPE
529 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
532 if (dump_enabled_p ())
533 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
534 "Build SLP failed: invalid type of def "
535 "for variable-length SLP %T\n", oprnd
);
540 /* Check the types of the definitions. */
543 case vect_external_def
:
544 /* Make sure to demote the overall operand to external. */
545 oprnd_info
->first_dt
= vect_external_def
;
547 case vect_constant_def
:
548 oprnd_info
->def_stmts
.quick_push (NULL
);
549 oprnd_info
->ops
.quick_push (oprnd
);
552 case vect_internal_def
:
553 case vect_reduction_def
:
554 if (oprnd_info
->first_dt
== vect_reduction_def
555 && !STMT_VINFO_DATA_REF (stmt_info
)
556 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
557 && !STMT_VINFO_DATA_REF (def_stmt_info
)
558 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
559 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
561 /* For a SLP reduction chain we want to duplicate the
562 reduction to each of the chain members. That gets
563 us a sane SLP graph (still the stmts are not 100%
564 correct wrt the initial values). */
566 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
567 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
571 case vect_induction_def
:
572 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
573 oprnd_info
->ops
.quick_push (oprnd
);
577 /* FORNOW: Not supported. */
578 if (dump_enabled_p ())
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
580 "Build SLP failed: illegal type of def %T\n",
590 if (dump_enabled_p ())
591 dump_printf_loc (MSG_NOTE
, vect_location
,
592 "swapped operands to match def types in %G",
600 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
601 Return true if we can, meaning that this choice doesn't conflict with
602 existing SLP nodes that use STMT_INFO. */
605 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
607 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
608 if (old_vectype
&& useless_type_conversion_p (vectype
, old_vectype
))
611 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
612 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
614 /* We maintain the invariant that if any statement in the group is
615 used, all other members of the group have the same vector type. */
616 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
617 stmt_vec_info member_info
= first_info
;
618 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
619 if (STMT_VINFO_NUM_SLP_USES (member_info
) > 0
620 || is_pattern_stmt_p (member_info
))
625 for (member_info
= first_info
; member_info
;
626 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
627 STMT_VINFO_VECTYPE (member_info
) = vectype
;
631 else if (STMT_VINFO_NUM_SLP_USES (stmt_info
) == 0
632 && !is_pattern_stmt_p (stmt_info
))
634 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
638 if (dump_enabled_p ())
640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
641 "Build SLP failed: incompatible vector"
642 " types for: %G", stmt_info
->stmt
);
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 " old vector type: %T\n", old_vectype
);
645 dump_printf_loc (MSG_NOTE
, vect_location
,
646 " new vector type: %T\n", vectype
);
651 /* Return true if call statements CALL1 and CALL2 are similar enough
652 to be combined into the same SLP group. */
655 compatible_calls_p (gcall
*call1
, gcall
*call2
)
657 unsigned int nargs
= gimple_call_num_args (call1
);
658 if (nargs
!= gimple_call_num_args (call2
))
661 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
664 if (gimple_call_internal_p (call1
))
666 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
667 TREE_TYPE (gimple_call_lhs (call2
))))
669 for (unsigned int i
= 0; i
< nargs
; ++i
)
670 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
671 TREE_TYPE (gimple_call_arg (call2
, i
))))
676 if (!operand_equal_p (gimple_call_fn (call1
),
677 gimple_call_fn (call2
), 0))
680 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
686 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
687 caller's attempt to find the vector type in STMT_INFO with the narrowest
688 element type. Return true if VECTYPE is nonnull and if it is valid
689 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
690 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
691 vect_build_slp_tree. */
694 vect_record_max_nunits (vec_info
*vinfo
, stmt_vec_info stmt_info
,
695 unsigned int group_size
,
696 tree vectype
, poly_uint64
*max_nunits
)
700 if (dump_enabled_p ())
701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
702 "Build SLP failed: unsupported data-type in %G\n",
704 /* Fatal mismatch. */
708 /* If populating the vector type requires unrolling then fail
709 before adjusting *max_nunits for basic-block vectorization. */
710 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
711 unsigned HOST_WIDE_INT const_nunits
;
712 if (is_a
<bb_vec_info
> (vinfo
)
713 && (!nunits
.is_constant (&const_nunits
)
714 || const_nunits
> group_size
))
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
718 "Build SLP failed: unrolling required "
719 "in basic block SLP\n");
720 /* Fatal mismatch. */
724 /* In case of multiple types we need to detect the smallest type. */
725 vect_update_max_nunits (max_nunits
, vectype
);
729 /* Verify if the scalar stmts STMTS are isomorphic, require data
730 permutation or are of unsupported types of operation. Return
731 true if they are, otherwise return false and indicate in *MATCHES
732 which stmts are not isomorphic to the first one. If MATCHES[0]
733 is false then this indicates the comparison could not be
734 carried out or the stmts will never be vectorized by SLP.
736 Note COND_EXPR is possibly isomorphic to another one after swapping its
737 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
738 the first stmt by swapping the two operands of comparison; set SWAP[i]
739 to 2 if stmt I is isormorphic to the first stmt by inverting the code
740 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
741 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
744 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
745 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
746 poly_uint64
*max_nunits
, bool *matches
,
747 bool *two_operators
, tree
*node_vectype
)
750 stmt_vec_info first_stmt_info
= stmts
[0];
751 enum tree_code first_stmt_code
= ERROR_MARK
;
752 enum tree_code alt_stmt_code
= ERROR_MARK
;
753 enum tree_code rhs_code
= ERROR_MARK
;
754 enum tree_code first_cond_code
= ERROR_MARK
;
756 bool need_same_oprnds
= false;
757 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
760 machine_mode optab_op2_mode
;
761 machine_mode vec_mode
;
762 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
765 /* For every stmt in NODE find its def stmt/s. */
766 stmt_vec_info stmt_info
;
767 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
769 gimple
*stmt
= stmt_info
->stmt
;
773 if (dump_enabled_p ())
774 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
776 /* Fail to vectorize statements marked as unvectorizable. */
777 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
779 if (dump_enabled_p ())
780 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
781 "Build SLP failed: unvectorizable statement %G",
783 /* Fatal mismatch. */
788 lhs
= gimple_get_lhs (stmt
);
789 if (lhs
== NULL_TREE
)
791 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
793 "Build SLP failed: not GIMPLE_ASSIGN nor "
794 "GIMPLE_CALL %G", stmt
);
795 /* Fatal mismatch. */
801 if (!vect_get_vector_types_for_stmt (vinfo
, stmt_info
, &vectype
,
802 &nunits_vectype
, group_size
)
804 && !vect_record_max_nunits (vinfo
, stmt_info
, group_size
,
805 nunits_vectype
, max_nunits
)))
807 /* Fatal mismatch. */
812 gcc_assert (vectype
);
814 if (is_a
<bb_vec_info
> (vinfo
)
815 && !vect_update_shared_vectype (stmt_info
, vectype
))
818 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
821 rhs_code
= CALL_EXPR
;
823 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
825 else if ((gimple_call_internal_p (call_stmt
)
826 && (!vectorizable_internal_fn_p
827 (gimple_call_internal_fn (call_stmt
))))
828 || gimple_call_tail_p (call_stmt
)
829 || gimple_call_noreturn_p (call_stmt
)
830 || !gimple_call_nothrow_p (call_stmt
)
831 || gimple_call_chain (call_stmt
))
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
835 "Build SLP failed: unsupported call type %G",
837 /* Fatal mismatch. */
844 rhs_code
= gimple_assign_rhs_code (stmt
);
845 load_p
= gimple_vuse (stmt
);
848 /* Check the operation. */
851 *node_vectype
= vectype
;
852 first_stmt_code
= rhs_code
;
854 /* Shift arguments should be equal in all the packed stmts for a
855 vector shift with scalar shift operand. */
856 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
857 || rhs_code
== LROTATE_EXPR
858 || rhs_code
== RROTATE_EXPR
)
860 vec_mode
= TYPE_MODE (vectype
);
862 /* First see if we have a vector/vector shift. */
863 optab
= optab_for_tree_code (rhs_code
, vectype
,
867 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
869 /* No vector/vector shift, try for a vector/scalar shift. */
870 optab
= optab_for_tree_code (rhs_code
, vectype
,
875 if (dump_enabled_p ())
876 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
877 "Build SLP failed: no optab.\n");
878 /* Fatal mismatch. */
882 icode
= (int) optab_handler (optab
, vec_mode
);
883 if (icode
== CODE_FOR_nothing
)
885 if (dump_enabled_p ())
886 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
888 "op not supported by target.\n");
889 /* Fatal mismatch. */
893 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
894 if (!VECTOR_MODE_P (optab_op2_mode
))
896 need_same_oprnds
= true;
897 first_op1
= gimple_assign_rhs2 (stmt
);
901 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
903 need_same_oprnds
= true;
904 first_op1
= gimple_assign_rhs2 (stmt
);
907 && rhs_code
== BIT_FIELD_REF
)
909 tree vec
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0);
910 if (TREE_CODE (vec
) != SSA_NAME
911 || !types_compatible_p (vectype
, TREE_TYPE (vec
)))
913 if (dump_enabled_p ())
914 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
916 "BIT_FIELD_REF not supported\n");
917 /* Fatal mismatch. */
923 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
925 need_same_oprnds
= true;
926 first_op1
= gimple_call_arg (call_stmt
, 1);
931 if (first_stmt_code
!= rhs_code
932 && alt_stmt_code
== ERROR_MARK
)
933 alt_stmt_code
= rhs_code
;
934 if (first_stmt_code
!= rhs_code
935 && (first_stmt_code
!= IMAGPART_EXPR
936 || rhs_code
!= REALPART_EXPR
)
937 && (first_stmt_code
!= REALPART_EXPR
938 || rhs_code
!= IMAGPART_EXPR
)
939 /* Handle mismatches in plus/minus by computing both
940 and merging the results. */
941 && !((first_stmt_code
== PLUS_EXPR
942 || first_stmt_code
== MINUS_EXPR
)
943 && (alt_stmt_code
== PLUS_EXPR
944 || alt_stmt_code
== MINUS_EXPR
)
945 && rhs_code
== alt_stmt_code
)
946 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
947 && (first_stmt_code
== ARRAY_REF
948 || first_stmt_code
== BIT_FIELD_REF
949 || first_stmt_code
== INDIRECT_REF
950 || first_stmt_code
== COMPONENT_REF
951 || first_stmt_code
== MEM_REF
)))
953 if (dump_enabled_p ())
955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
956 "Build SLP failed: different operation "
958 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
959 "original stmt %G", first_stmt_info
->stmt
);
965 if (need_same_oprnds
)
967 tree other_op1
= (call_stmt
968 ? gimple_call_arg (call_stmt
, 1)
969 : gimple_assign_rhs2 (stmt
));
970 if (!operand_equal_p (first_op1
, other_op1
, 0))
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
974 "Build SLP failed: different shift "
975 "arguments in %G", stmt
);
981 && first_stmt_code
== BIT_FIELD_REF
982 && (TREE_OPERAND (gimple_assign_rhs1 (first_stmt_info
->stmt
), 0)
983 != TREE_OPERAND (gimple_assign_rhs1 (stmt_info
->stmt
), 0)))
985 if (dump_enabled_p ())
986 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
987 "Build SLP failed: different BIT_FIELD_REF "
988 "arguments in %G", stmt
);
993 if (!load_p
&& rhs_code
== CALL_EXPR
)
995 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
996 as_a
<gcall
*> (stmt
)))
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1000 "Build SLP failed: different calls in %G",
1008 /* Grouped store or load. */
1009 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1011 if (REFERENCE_CLASS_P (lhs
))
1019 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1020 if (prev_first_load
)
1022 /* Check that there are no loads from different interleaving
1023 chains in the same node. */
1024 if (prev_first_load
!= first_load
)
1026 if (dump_enabled_p ())
1027 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1029 "Build SLP failed: different "
1030 "interleaving chains in one node %G",
1037 prev_first_load
= first_load
;
1039 } /* Grouped access. */
1044 /* Not grouped load. */
1045 if (dump_enabled_p ())
1046 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1047 "Build SLP failed: not grouped load %G", stmt
);
1049 /* FORNOW: Not grouped loads are not supported. */
1050 /* Fatal mismatch. */
1055 /* Not memory operation. */
1056 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
1057 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1058 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1059 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1060 && rhs_code
!= VIEW_CONVERT_EXPR
1061 && rhs_code
!= CALL_EXPR
1062 && rhs_code
!= BIT_FIELD_REF
)
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1066 "Build SLP failed: operation unsupported %G",
1068 /* Fatal mismatch. */
1073 if (rhs_code
== COND_EXPR
)
1075 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1076 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1077 enum tree_code swap_code
= ERROR_MARK
;
1078 enum tree_code invert_code
= ERROR_MARK
;
1081 first_cond_code
= TREE_CODE (cond_expr
);
1082 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1084 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1085 swap_code
= swap_tree_comparison (cond_code
);
1086 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1089 if (first_cond_code
== cond_code
)
1091 /* Isomorphic can be achieved by swapping. */
1092 else if (first_cond_code
== swap_code
)
1094 /* Isomorphic can be achieved by inverting. */
1095 else if (first_cond_code
== invert_code
)
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1101 "Build SLP failed: different"
1102 " operation %G", stmt
);
1112 for (i
= 0; i
< group_size
; ++i
)
1116 /* If we allowed a two-operation SLP node verify the target can cope
1117 with the permute we are going to use. */
1118 if (alt_stmt_code
!= ERROR_MARK
1119 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1121 *two_operators
= true;
1127 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1128 Note we never remove apart from at destruction time so we do not
1129 need a special value for deleted that differs from empty. */
1132 typedef vec
<stmt_vec_info
> value_type
;
1133 typedef vec
<stmt_vec_info
> compare_type
;
1134 static inline hashval_t
hash (value_type
);
1135 static inline bool equal (value_type existing
, value_type candidate
);
1136 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1137 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1138 static const bool empty_zero_p
= true;
1139 static inline void mark_empty (value_type
&x
) { x
.release (); }
1140 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1141 static inline void remove (value_type
&x
) { x
.release (); }
1144 bst_traits::hash (value_type x
)
1147 for (unsigned i
= 0; i
< x
.length (); ++i
)
1148 h
.add_int (gimple_uid (x
[i
]->stmt
));
1152 bst_traits::equal (value_type existing
, value_type candidate
)
1154 if (existing
.length () != candidate
.length ())
1156 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1157 if (existing
[i
] != candidate
[i
])
1162 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1163 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1164 scalar_stmts_to_slp_tree_map_t
;
1167 vect_build_slp_tree_2 (vec_info
*vinfo
,
1168 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1169 poly_uint64
*max_nunits
,
1170 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1171 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1174 vect_build_slp_tree (vec_info
*vinfo
,
1175 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1176 poly_uint64
*max_nunits
,
1177 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1178 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1180 if (slp_tree
*leader
= bst_map
->get (stmts
))
1182 if (dump_enabled_p ())
1183 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1184 *leader
? "" : "failed ", *leader
);
1187 (*leader
)->refcnt
++;
1188 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1192 poly_uint64 this_max_nunits
= 1;
1193 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
,
1195 matches
, npermutes
, tree_size
, bst_map
);
1198 res
->max_nunits
= this_max_nunits
;
1199 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1200 /* Keep a reference for the bst_map use. */
1203 bst_map
->put (stmts
.copy (), res
);
1207 /* Recursively build an SLP tree starting from NODE.
1208 Fail (and return a value not equal to zero) if def-stmts are not
1209 isomorphic, require data permutation or are of unsupported types of
1210 operation. Otherwise, return 0.
1211 The value returned is the depth in the SLP tree where a mismatch
1215 vect_build_slp_tree_2 (vec_info
*vinfo
,
1216 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1217 poly_uint64
*max_nunits
,
1218 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1219 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1221 unsigned nops
, i
, this_tree_size
= 0;
1222 poly_uint64 this_max_nunits
= *max_nunits
;
1227 stmt_vec_info stmt_info
= stmts
[0];
1228 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1229 nops
= gimple_call_num_args (stmt
);
1230 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1232 nops
= gimple_num_ops (stmt
) - 1;
1233 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1236 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1241 /* If the SLP node is a PHI (induction or reduction), terminate
1243 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1245 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1246 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1247 if (!vect_record_max_nunits (vinfo
, stmt_info
, group_size
, vectype
,
1251 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1252 /* Induction from different IVs is not supported. */
1253 if (def_type
== vect_induction_def
)
1255 stmt_vec_info other_info
;
1256 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1257 if (stmt_info
!= other_info
)
1260 else if (def_type
== vect_reduction_def
1261 || def_type
== vect_double_reduction_def
1262 || def_type
== vect_nested_cycle
)
1264 /* Else def types have to match. */
1265 stmt_vec_info other_info
;
1266 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1267 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1273 node
= vect_create_new_slp_node (stmts
, 0);
1274 SLP_TREE_VECTYPE (node
) = vectype
;
1279 bool two_operators
= false;
1280 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1281 tree vectype
= NULL_TREE
;
1282 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1283 &this_max_nunits
, matches
, &two_operators
,
1287 /* If the SLP node is a load, terminate the recursion unless masked. */
1288 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1289 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1291 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1294 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1299 *max_nunits
= this_max_nunits
;
1301 node
= vect_create_new_slp_node (stmts
, 0);
1302 SLP_TREE_VECTYPE (node
) = vectype
;
1303 /* And compute the load permutation. Whether it is actually
1304 a permutation depends on the unrolling factor which is
1306 vec
<unsigned> load_permutation
;
1308 stmt_vec_info load_info
;
1309 load_permutation
.create (group_size
);
1310 stmt_vec_info first_stmt_info
1311 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1312 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1314 int load_place
= vect_get_place_in_interleaving_chain
1315 (load_info
, first_stmt_info
);
1316 gcc_assert (load_place
!= -1);
1317 load_permutation
.safe_push (load_place
);
1319 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1323 else if (gimple_assign_single_p (stmt_info
->stmt
)
1324 && !gimple_vuse (stmt_info
->stmt
)
1325 && gimple_assign_rhs_code (stmt_info
->stmt
) == BIT_FIELD_REF
)
1327 /* vect_build_slp_tree_2 determined all BIT_FIELD_REFs reference
1328 the same SSA name vector of a compatible type to vectype. */
1329 vec
<std::pair
<unsigned, unsigned> > lperm
= vNULL
;
1330 tree vec
= TREE_OPERAND (gimple_assign_rhs1 (stmt_info
->stmt
), 0);
1331 stmt_vec_info estmt_info
;
1332 FOR_EACH_VEC_ELT (stmts
, i
, estmt_info
)
1334 gassign
*estmt
= as_a
<gassign
*> (estmt_info
->stmt
);
1335 tree bfref
= gimple_assign_rhs1 (estmt
);
1337 if (!known_eq (bit_field_size (bfref
),
1338 tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (vectype
))))
1339 || !constant_multiple_p (bit_field_offset (bfref
),
1340 bit_field_size (bfref
), &lane
))
1345 lperm
.safe_push (std::make_pair (0, (unsigned)lane
));
1347 slp_tree vnode
= vect_create_new_slp_node (vNULL
);
1348 SLP_TREE_VECTYPE (vnode
) = TREE_TYPE (vec
);
1349 SLP_TREE_VEC_DEFS (vnode
).safe_push (vec
);
1350 /* We are always building a permutation node even if it is an identity
1351 permute to shield the rest of the vectorizer from the odd node
1352 representing an actual vector without any scalar ops.
1353 ??? We could hide it completely with making the permute node
1355 node
= vect_create_new_slp_node (stmts
, 1);
1356 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
1357 SLP_TREE_LANE_PERMUTATION (node
) = lperm
;
1358 SLP_TREE_VECTYPE (node
) = vectype
;
1359 SLP_TREE_CHILDREN (node
).quick_push (vnode
);
1363 /* Get at the operands, verifying they are compatible. */
1364 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1365 slp_oprnd_info oprnd_info
;
1366 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1368 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1369 stmts
, i
, &oprnds_info
);
1371 matches
[(res
== -1) ? 0 : i
] = false;
1375 for (i
= 0; i
< group_size
; ++i
)
1378 vect_free_oprnd_info (oprnds_info
);
1382 auto_vec
<slp_tree
, 4> children
;
1384 stmt_info
= stmts
[0];
1386 /* Create SLP_TREE nodes for the definition node/s. */
1387 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1392 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1394 /* COND_EXPR have one too many eventually if the condition
1396 gcc_assert (i
== 3 && nops
== 4);
1400 if (oprnd_info
->first_dt
!= vect_internal_def
1401 && oprnd_info
->first_dt
!= vect_reduction_def
1402 && oprnd_info
->first_dt
!= vect_induction_def
)
1404 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1405 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1406 oprnd_info
->ops
= vNULL
;
1407 children
.safe_push (invnode
);
1411 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1412 group_size
, &this_max_nunits
,
1414 &this_tree_size
, bst_map
)) != NULL
)
1416 oprnd_info
->def_stmts
= vNULL
;
1417 children
.safe_push (child
);
1421 /* If the SLP build failed fatally and we analyze a basic-block
1422 simply treat nodes we fail to build as externally defined
1423 (and thus build vectors from the scalar defs).
1424 The cost model will reject outright expensive cases.
1425 ??? This doesn't treat cases where permutation ultimatively
1426 fails (or we don't try permutation below). Ideally we'd
1427 even compute a permutation that will end up with the maximum
1429 if (is_a
<bb_vec_info
> (vinfo
)
1431 /* ??? Rejecting patterns this way doesn't work. We'd have to
1432 do extra work to cancel the pattern so the uses see the
1434 && !is_pattern_stmt_p (stmt_info
)
1435 && !oprnd_info
->any_pattern
)
1437 if (dump_enabled_p ())
1438 dump_printf_loc (MSG_NOTE
, vect_location
,
1439 "Building vector operands from scalars\n");
1441 child
= vect_create_new_slp_node (oprnd_info
->ops
);
1442 children
.safe_push (child
);
1443 oprnd_info
->ops
= vNULL
;
1444 oprnd_info
->def_stmts
= vNULL
;
1448 /* If the SLP build for operand zero failed and operand zero
1449 and one can be commutated try that for the scalar stmts
1450 that failed the match. */
1452 /* A first scalar stmt mismatch signals a fatal mismatch. */
1454 /* ??? For COND_EXPRs we can swap the comparison operands
1455 as well as the arms under some constraints. */
1457 && oprnds_info
[1]->first_dt
== vect_internal_def
1458 && is_gimple_assign (stmt_info
->stmt
)
1459 /* Swapping operands for reductions breaks assumptions later on. */
1460 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1461 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1462 /* Do so only if the number of not successful permutes was nor more
1463 than a cut-ff as re-trying the recursive match on
1464 possibly each level of the tree would expose exponential
1468 /* See whether we can swap the matching or the non-matching
1470 bool swap_not_matching
= true;
1473 for (j
= 0; j
< group_size
; ++j
)
1475 if (matches
[j
] != !swap_not_matching
)
1477 stmt_vec_info stmt_info
= stmts
[j
];
1478 /* Verify if we can swap operands of this stmt. */
1479 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1481 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1483 if (!swap_not_matching
)
1485 swap_not_matching
= false;
1490 while (j
!= group_size
);
1492 /* Swap mismatched definition stmts. */
1493 if (dump_enabled_p ())
1494 dump_printf_loc (MSG_NOTE
, vect_location
,
1495 "Re-trying with swapped operands of stmts ");
1496 for (j
= 0; j
< group_size
; ++j
)
1497 if (matches
[j
] == !swap_not_matching
)
1499 std::swap (oprnds_info
[0]->def_stmts
[j
],
1500 oprnds_info
[1]->def_stmts
[j
]);
1501 std::swap (oprnds_info
[0]->ops
[j
],
1502 oprnds_info
[1]->ops
[j
]);
1503 if (dump_enabled_p ())
1504 dump_printf (MSG_NOTE
, "%d ", j
);
1506 if (dump_enabled_p ())
1507 dump_printf (MSG_NOTE
, "\n");
1508 /* And try again with scratch 'matches' ... */
1509 bool *tem
= XALLOCAVEC (bool, group_size
);
1510 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1511 group_size
, &this_max_nunits
,
1513 &this_tree_size
, bst_map
)) != NULL
)
1515 oprnd_info
->def_stmts
= vNULL
;
1516 children
.safe_push (child
);
1524 gcc_assert (child
== NULL
);
1525 FOR_EACH_VEC_ELT (children
, j
, child
)
1526 vect_free_slp_tree (child
, false);
1527 vect_free_oprnd_info (oprnds_info
);
1531 vect_free_oprnd_info (oprnds_info
);
1533 /* If we have all children of a child built up from uniform scalars
1534 then just throw that away, causing it built up from scalars.
1535 The exception is the SLP node for the vector store. */
1536 if (is_a
<bb_vec_info
> (vinfo
)
1537 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1538 /* ??? Rejecting patterns this way doesn't work. We'd have to
1539 do extra work to cancel the pattern so the uses see the
1541 && !is_pattern_stmt_p (stmt_info
))
1545 FOR_EACH_VEC_ELT (children
, j
, child
)
1546 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
1547 || !vect_slp_tree_uniform_p (child
))
1553 FOR_EACH_VEC_ELT (children
, j
, child
)
1554 vect_free_slp_tree (child
, false);
1556 if (dump_enabled_p ())
1557 dump_printf_loc (MSG_NOTE
, vect_location
,
1558 "Building parent vector operands from "
1559 "scalars instead\n");
1564 *tree_size
+= this_tree_size
+ 1;
1565 *max_nunits
= this_max_nunits
;
1569 /* ??? We'd likely want to either cache in bst_map sth like
1570 { a+b, NULL, a+b, NULL } and { NULL, a-b, NULL, a-b } or
1571 the true { a+b, a+b, a+b, a+b } ... but there we don't have
1572 explicit stmts to put in so the keying on 'stmts' doesn't
1573 work (but we have the same issue with nodes that use 'ops'). */
1574 slp_tree one
= new _slp_tree
;
1575 slp_tree two
= new _slp_tree
;
1576 SLP_TREE_DEF_TYPE (one
) = vect_internal_def
;
1577 SLP_TREE_DEF_TYPE (two
) = vect_internal_def
;
1578 SLP_TREE_VECTYPE (one
) = vectype
;
1579 SLP_TREE_VECTYPE (two
) = vectype
;
1580 SLP_TREE_CHILDREN (one
).safe_splice (children
);
1581 SLP_TREE_CHILDREN (two
).safe_splice (children
);
1583 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (two
), i
, child
)
1586 /* Here we record the original defs since this
1587 node represents the final lane configuration. */
1588 node
= vect_create_new_slp_node (stmts
, 2);
1589 SLP_TREE_VECTYPE (node
) = vectype
;
1590 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
1591 SLP_TREE_CHILDREN (node
).quick_push (one
);
1592 SLP_TREE_CHILDREN (node
).quick_push (two
);
1593 gassign
*stmt
= as_a
<gassign
*> (stmts
[0]->stmt
);
1594 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
1595 enum tree_code ocode
= ERROR_MARK
;
1596 stmt_vec_info ostmt_info
;
1598 FOR_EACH_VEC_ELT (stmts
, i
, ostmt_info
)
1600 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
1601 if (gimple_assign_rhs_code (ostmt
) != code0
)
1603 SLP_TREE_LANE_PERMUTATION (node
).safe_push (std::make_pair (1, i
));
1604 ocode
= gimple_assign_rhs_code (ostmt
);
1608 SLP_TREE_LANE_PERMUTATION (node
).safe_push (std::make_pair (0, i
));
1610 SLP_TREE_CODE (one
) = code0
;
1611 SLP_TREE_CODE (two
) = ocode
;
1612 SLP_TREE_LANES (one
) = stmts
.length ();
1613 SLP_TREE_LANES (two
) = stmts
.length ();
1614 SLP_TREE_REPRESENTATIVE (one
) = stmts
[0];
1615 SLP_TREE_REPRESENTATIVE (two
) = stmts
[j
];
1619 node
= vect_create_new_slp_node (stmts
, nops
);
1620 SLP_TREE_VECTYPE (node
) = vectype
;
1621 SLP_TREE_CHILDREN (node
).splice (children
);
1625 /* Dump a single SLP tree NODE. */
1628 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1633 stmt_vec_info stmt_info
;
1636 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1637 dump_user_location_t user_loc
= loc
.get_user_location ();
1638 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1639 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1641 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1644 estimated_poly_value (node
->max_nunits
), node
->refcnt
);
1645 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1646 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1647 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1650 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1651 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1652 dump_printf (metadata
, "%T%s ", op
,
1653 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1654 dump_printf (metadata
, "}\n");
1656 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1658 dump_printf_loc (metadata
, user_loc
, "\tload permutation {");
1659 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), i
, j
)
1660 dump_printf (dump_kind
, " %u", j
);
1661 dump_printf (dump_kind
, " }\n");
1663 if (SLP_TREE_LANE_PERMUTATION (node
).exists ())
1665 dump_printf_loc (metadata
, user_loc
, "\tlane permutation {");
1666 for (i
= 0; i
< SLP_TREE_LANE_PERMUTATION (node
).length (); ++i
)
1667 dump_printf (dump_kind
, " %u[%u]",
1668 SLP_TREE_LANE_PERMUTATION (node
)[i
].first
,
1669 SLP_TREE_LANE_PERMUTATION (node
)[i
].second
);
1670 dump_printf (dump_kind
, " }\n");
1672 if (SLP_TREE_CHILDREN (node
).is_empty ())
1674 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1675 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1676 dump_printf (dump_kind
, " %p", (void *)child
);
1677 dump_printf (dump_kind
, "\n");
1681 debug (slp_tree node
)
1683 debug_dump_context ctx
;
1684 vect_print_slp_tree (MSG_NOTE
,
1685 dump_location_t::from_location_t (UNKNOWN_LOCATION
),
1689 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1692 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1693 slp_tree node
, hash_set
<slp_tree
> &visited
)
1698 if (visited
.add (node
))
1701 vect_print_slp_tree (dump_kind
, loc
, node
);
1703 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1704 vect_print_slp_graph (dump_kind
, loc
, child
, visited
);
1708 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1711 hash_set
<slp_tree
> visited
;
1712 vect_print_slp_graph (dump_kind
, loc
, entry
, visited
);
1715 /* Mark the tree rooted at NODE with PURE_SLP. */
1718 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1721 stmt_vec_info stmt_info
;
1724 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1727 if (visited
.add (node
))
1730 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1731 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1733 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1734 vect_mark_slp_stmts (child
, visited
);
1738 vect_mark_slp_stmts (slp_tree node
)
1740 hash_set
<slp_tree
> visited
;
1741 vect_mark_slp_stmts (node
, visited
);
1744 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1747 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1750 stmt_vec_info stmt_info
;
1753 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1756 if (visited
.add (node
))
1759 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1761 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1762 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1763 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1766 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1767 vect_mark_slp_stmts_relevant (child
, visited
);
1771 vect_mark_slp_stmts_relevant (slp_tree node
)
1773 hash_set
<slp_tree
> visited
;
1774 vect_mark_slp_stmts_relevant (node
, visited
);
1777 /* Copy the SLP subtree rooted at NODE. */
1780 slp_copy_subtree (slp_tree node
, hash_map
<slp_tree
, slp_tree
> &map
)
1785 slp_tree
©_ref
= map
.get_or_insert (node
, &existed_p
);
1789 copy_ref
= new _slp_tree
;
1790 slp_tree copy
= copy_ref
;
1791 SLP_TREE_DEF_TYPE (copy
) = SLP_TREE_DEF_TYPE (node
);
1792 SLP_TREE_VECTYPE (copy
) = SLP_TREE_VECTYPE (node
);
1793 SLP_TREE_REPRESENTATIVE (copy
) = SLP_TREE_REPRESENTATIVE (node
);
1794 SLP_TREE_LANES (copy
) = SLP_TREE_LANES (node
);
1795 copy
->max_nunits
= node
->max_nunits
;
1797 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1799 SLP_TREE_SCALAR_STMTS (copy
) = SLP_TREE_SCALAR_STMTS (node
).copy ();
1800 stmt_vec_info stmt_info
;
1801 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1802 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
1804 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1805 SLP_TREE_SCALAR_OPS (copy
) = SLP_TREE_SCALAR_OPS (node
).copy ();
1806 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1807 SLP_TREE_LOAD_PERMUTATION (copy
) = SLP_TREE_LOAD_PERMUTATION (node
).copy ();
1808 if (SLP_TREE_LANE_PERMUTATION (node
).exists ())
1809 SLP_TREE_LANE_PERMUTATION (copy
) = SLP_TREE_LANE_PERMUTATION (node
).copy ();
1810 if (SLP_TREE_CHILDREN (node
).exists ())
1811 SLP_TREE_CHILDREN (copy
) = SLP_TREE_CHILDREN (node
).copy ();
1812 gcc_assert (!SLP_TREE_VEC_STMTS (node
).exists ());
1815 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy
), i
, child
)
1817 SLP_TREE_CHILDREN (copy
)[i
] = slp_copy_subtree (child
, map
);
1818 SLP_TREE_CHILDREN (copy
)[i
]->refcnt
++;
1823 /* Rearrange the statements of NODE according to PERMUTATION. */
1826 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1827 vec
<unsigned> permutation
,
1828 hash_set
<slp_tree
> &visited
)
1833 if (visited
.add (node
))
1836 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1837 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1839 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1841 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1842 vec
<stmt_vec_info
> tmp_stmts
;
1843 tmp_stmts
.create (group_size
);
1844 tmp_stmts
.quick_grow (group_size
);
1845 stmt_vec_info stmt_info
;
1846 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1847 tmp_stmts
[permutation
[i
]] = stmt_info
;
1848 SLP_TREE_SCALAR_STMTS (node
).release ();
1849 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1851 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1853 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1855 tmp_ops
.create (group_size
);
1856 tmp_ops
.quick_grow (group_size
);
1858 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1859 tmp_ops
[permutation
[i
]] = op
;
1860 SLP_TREE_SCALAR_OPS (node
).release ();
1861 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1863 if (SLP_TREE_LANE_PERMUTATION (node
).exists ())
1865 gcc_assert (group_size
== SLP_TREE_LANE_PERMUTATION (node
).length ());
1866 for (i
= 0; i
< group_size
; ++i
)
1867 SLP_TREE_LANE_PERMUTATION (node
)[i
].second
1868 = permutation
[SLP_TREE_LANE_PERMUTATION (node
)[i
].second
];
1873 /* Attempt to reorder stmts in a reduction chain so that we don't
1874 require any load permutation. Return true if that was possible,
1875 otherwise return false. */
1878 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1882 slp_tree node
, load
;
1884 if (SLP_INSTANCE_LOADS (slp_instn
).is_empty ())
1887 /* Compare all the permutation sequences to the first one. We know
1888 that at least one load is permuted. */
1889 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1890 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1892 unsigned int group_size
= SLP_TREE_LOAD_PERMUTATION (node
).length ();
1893 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1895 if (!SLP_TREE_LOAD_PERMUTATION (load
).exists ()
1896 || SLP_TREE_LOAD_PERMUTATION (load
).length () != group_size
)
1898 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load
), j
, lidx
)
1899 if (lidx
!= SLP_TREE_LOAD_PERMUTATION (node
)[j
])
1903 /* Check that the loads in the first sequence are different and there
1904 are no gaps between them. */
1905 auto_sbitmap
load_index (group_size
);
1906 bitmap_clear (load_index
);
1907 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1909 if (lidx
>= group_size
)
1911 if (bitmap_bit_p (load_index
, lidx
))
1914 bitmap_set_bit (load_index
, lidx
);
1916 for (i
= 0; i
< group_size
; i
++)
1917 if (!bitmap_bit_p (load_index
, i
))
1920 /* This permutation is valid for reduction. Since the order of the
1921 statements in the nodes is not important unless they are memory
1922 accesses, we can rearrange the statements in all the nodes
1923 according to the order of the loads. */
1925 /* We have to unshare the SLP tree we modify. */
1926 hash_map
<slp_tree
, slp_tree
> map
;
1927 slp_tree unshared
= slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn
), map
);
1928 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn
), false);
1930 SLP_INSTANCE_TREE (slp_instn
) = unshared
;
1931 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1932 SLP_INSTANCE_LOADS (slp_instn
)[i
] = *map
.get (node
);
1933 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1935 /* Do the actual re-arrangement. */
1936 hash_set
<slp_tree
> visited
;
1937 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1938 node
->load_permutation
, visited
);
1940 /* We are done, no actual permutations need to be generated. */
1941 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1942 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1944 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1945 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1946 /* But we have to keep those permutations that are required because
1947 of handling of gaps. */
1948 if (known_eq (unrolling_factor
, 1U)
1949 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1950 && DR_GROUP_GAP (first_stmt_info
) == 0))
1951 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1953 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1954 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1960 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1963 vect_gather_slp_loads (vec
<slp_tree
> &loads
, slp_tree node
,
1964 hash_set
<slp_tree
> &visited
)
1966 if (visited
.add (node
))
1969 if (SLP_TREE_CHILDREN (node
).length () == 0)
1971 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1973 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1974 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1975 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1976 loads
.safe_push (node
);
1982 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1983 vect_gather_slp_loads (loads
, child
, visited
);
1988 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1990 hash_set
<slp_tree
> visited
;
1991 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst
), node
, visited
);
1995 /* Find the last store in SLP INSTANCE. */
1998 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
2000 stmt_vec_info last
= NULL
;
2001 stmt_vec_info stmt_vinfo
;
2003 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
2005 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
2006 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
2012 /* Find the first stmt in NODE. */
2015 vect_find_first_scalar_stmt_in_slp (slp_tree node
)
2017 stmt_vec_info first
= NULL
;
2018 stmt_vec_info stmt_vinfo
;
2020 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
2022 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
2024 || get_later_stmt (stmt_vinfo
, first
) == first
)
2031 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
2032 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2033 (also containing the first GROUP1_SIZE stmts, since stores are
2034 consecutive), the second containing the remainder.
2035 Return the first stmt in the second group. */
2037 static stmt_vec_info
2038 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
2040 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
2041 gcc_assert (group1_size
> 0);
2042 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
2043 gcc_assert (group2_size
> 0);
2044 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
2046 stmt_vec_info stmt_info
= first_vinfo
;
2047 for (unsigned i
= group1_size
; i
> 1; i
--)
2049 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2050 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
2052 /* STMT is now the last element of the first group. */
2053 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2054 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
2056 DR_GROUP_SIZE (group2
) = group2_size
;
2057 for (stmt_info
= group2
; stmt_info
;
2058 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
2060 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
2061 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
2064 /* For the second group, the DR_GROUP_GAP is that before the original group,
2065 plus skipping over the first vector. */
2066 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
2068 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
2069 DR_GROUP_GAP (first_vinfo
) += group2_size
;
2071 if (dump_enabled_p ())
2072 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
2073 group1_size
, group2_size
);
2078 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2079 statements and a vector of NUNITS elements. */
2082 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
2084 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2087 /* Analyze an SLP instance starting from a group of grouped stores. Call
2088 vect_build_slp_tree to build a tree of packed stmts if possible.
2089 Return FALSE if it's impossible to SLP any stmt in the loop. */
2092 vect_analyze_slp_instance (vec_info
*vinfo
,
2093 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2094 stmt_vec_info stmt_info
, unsigned max_tree_size
)
2096 slp_instance new_instance
;
2098 unsigned int group_size
;
2099 tree vectype
, scalar_type
= NULL_TREE
;
2101 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2102 vec
<stmt_vec_info
> scalar_stmts
;
2103 bool constructor
= false;
2105 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2107 scalar_type
= TREE_TYPE (DR_REF (dr
));
2108 group_size
= DR_GROUP_SIZE (stmt_info
);
2109 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
2111 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2113 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2114 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2115 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2117 else if (is_gimple_assign (stmt_info
->stmt
)
2118 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2120 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
2121 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2126 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2127 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2128 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2133 if (dump_enabled_p ())
2134 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2135 "Build SLP failed: unsupported data-type %T\n",
2140 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2142 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2143 scalar_stmts
.create (group_size
);
2144 stmt_vec_info next_info
= stmt_info
;
2145 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2147 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2150 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2151 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2154 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2156 /* Collect the reduction stmts and store them in
2157 SLP_TREE_SCALAR_STMTS. */
2160 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2161 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2163 /* Mark the first element of the reduction chain as reduction to properly
2164 transform the node. In the reduction analysis phase only the last
2165 element of the chain is marked as reduction. */
2166 STMT_VINFO_DEF_TYPE (stmt_info
)
2167 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2168 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2169 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2171 else if (constructor
)
2173 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2175 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2177 if (TREE_CODE (val
) == SSA_NAME
)
2179 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2180 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2181 /* Value is defined in another basic block. */
2184 def_info
= vect_stmt_to_vectorize (def_info
);
2185 scalar_stmts
.safe_push (def_info
);
2190 if (dump_enabled_p ())
2191 dump_printf_loc (MSG_NOTE
, vect_location
,
2192 "Analyzing vectorizable constructor: %G\n",
2197 /* Collect reduction statements. */
2198 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2199 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2200 scalar_stmts
.safe_push (next_info
);
2203 /* Build the tree for the SLP instance. */
2204 bool *matches
= XALLOCAVEC (bool, group_size
);
2205 unsigned npermutes
= 0;
2206 poly_uint64 max_nunits
= nunits
;
2207 unsigned tree_size
= 0;
2208 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2209 &max_nunits
, matches
, &npermutes
,
2210 &tree_size
, bst_map
);
2213 /* Calculate the unrolling factor based on the smallest type. */
2214 poly_uint64 unrolling_factor
2215 = calculate_unrolling_factor (max_nunits
, group_size
);
2217 if (maybe_ne (unrolling_factor
, 1U)
2218 && is_a
<bb_vec_info
> (vinfo
))
2220 unsigned HOST_WIDE_INT const_max_nunits
;
2221 if (!max_nunits
.is_constant (&const_max_nunits
)
2222 || const_max_nunits
> group_size
)
2224 if (dump_enabled_p ())
2225 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2226 "Build SLP failed: store group "
2227 "size not a multiple of the vector size "
2228 "in basic block SLP\n");
2229 vect_free_slp_tree (node
, false);
2232 /* Fatal mismatch. */
2234 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2235 vect_free_slp_tree (node
, false);
2239 /* Create a new SLP instance. */
2240 new_instance
= XNEW (class _slp_instance
);
2241 SLP_INSTANCE_TREE (new_instance
) = node
;
2242 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2243 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2244 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2246 vect_gather_slp_loads (new_instance
, node
);
2247 if (dump_enabled_p ())
2248 dump_printf_loc (MSG_NOTE
, vect_location
,
2249 "SLP size %u vs. limit %u.\n",
2250 tree_size
, max_tree_size
);
2252 /* Check whether any load is possibly permuted. */
2254 bool loads_permuted
= false;
2255 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2257 if (!SLP_TREE_LOAD_PERMUTATION (load_node
).exists ())
2260 stmt_vec_info load_info
;
2261 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2262 if (SLP_TREE_LOAD_PERMUTATION (load_node
)[j
] != j
)
2264 loads_permuted
= true;
2269 /* If the loads and stores can be handled with load/store-lane
2270 instructions do not generate this SLP instance. */
2271 if (is_a
<loop_vec_info
> (vinfo
)
2273 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2276 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2278 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2279 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2280 /* Use SLP for strided accesses (or if we can't load-lanes). */
2281 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2282 || ! vect_load_lanes_supported
2283 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2284 DR_GROUP_SIZE (stmt_vinfo
), false))
2287 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2289 if (dump_enabled_p ())
2290 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2291 "Built SLP cancelled: can use "
2292 "load/store-lanes\n");
2293 vect_free_slp_instance (new_instance
, false);
2298 /* If this is a reduction chain with a conversion in front
2299 amend the SLP tree with a node for that. */
2301 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2302 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2304 /* Get at the conversion stmt - we know it's the single use
2305 of the last stmt of the reduction chain. */
2306 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2307 use_operand_p use_p
;
2309 bool r
= single_imm_use (gimple_assign_lhs (tem
),
2312 next_info
= vinfo
->lookup_stmt (use_stmt
);
2313 next_info
= vect_stmt_to_vectorize (next_info
);
2314 scalar_stmts
= vNULL
;
2315 scalar_stmts
.create (group_size
);
2316 for (unsigned i
= 0; i
< group_size
; ++i
)
2317 scalar_stmts
.quick_push (next_info
);
2318 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
, 1);
2319 SLP_TREE_VECTYPE (conv
) = STMT_VINFO_VECTYPE (next_info
);
2320 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2321 SLP_INSTANCE_TREE (new_instance
) = conv
;
2322 /* We also have to fake this conversion stmt as SLP reduction
2323 group so we don't have to mess with too much code
2325 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2326 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2329 vinfo
->slp_instances
.safe_push (new_instance
);
2331 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2332 the number of scalar stmts in the root in a few places.
2333 Verify that assumption holds. */
2334 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance
))
2335 .length () == group_size
);
2337 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_NOTE
, vect_location
,
2340 "Final SLP tree for instance:\n");
2341 vect_print_slp_graph (MSG_NOTE
, vect_location
,
2342 SLP_INSTANCE_TREE (new_instance
));
2350 /* Failed to SLP. */
2351 /* Free the allocated memory. */
2352 scalar_stmts
.release ();
2355 /* For basic block SLP, try to break the group up into multiples of the
2357 unsigned HOST_WIDE_INT const_nunits
;
2358 if (is_a
<bb_vec_info
> (vinfo
)
2359 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2360 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2361 && nunits
.is_constant (&const_nunits
))
2363 /* We consider breaking the group only on VF boundaries from the existing
2365 for (i
= 0; i
< group_size
; i
++)
2366 if (!matches
[i
]) break;
2368 if (i
>= const_nunits
&& i
< group_size
)
2370 /* Split into two groups at the first vector boundary before i. */
2371 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2372 unsigned group1_size
= i
& ~(const_nunits
- 1);
2374 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2376 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2378 /* If the first non-match was in the middle of a vector,
2379 skip the rest of that vector. */
2380 if (group1_size
< i
)
2382 i
= group1_size
+ const_nunits
;
2384 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2387 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2388 rest
, max_tree_size
);
2391 /* Even though the first vector did not all match, we might be able to SLP
2392 (some) of the remainder. FORNOW ignore this possibility. */
2399 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2400 trees of packed scalar stmts if SLP is possible. */
2403 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2406 stmt_vec_info first_element
;
2408 DUMP_VECT_SCOPE ("vect_analyze_slp");
2410 scalar_stmts_to_slp_tree_map_t
*bst_map
2411 = new scalar_stmts_to_slp_tree_map_t ();
2413 /* Find SLP sequences starting from groups of grouped stores. */
2414 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2415 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
, max_tree_size
);
2417 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2419 if (loop_vinfo
->reduction_chains
.length () > 0)
2421 /* Find SLP sequences starting from reduction chains. */
2422 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2423 if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2426 /* Dissolve reduction chain group. */
2427 stmt_vec_info vinfo
= first_element
;
2428 stmt_vec_info last
= NULL
;
2431 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2432 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2433 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2437 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2438 /* It can be still vectorized as part of an SLP reduction. */
2439 loop_vinfo
->reductions
.safe_push (last
);
2443 /* Find SLP sequences starting from groups of reductions. */
2444 if (loop_vinfo
->reductions
.length () > 1)
2445 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2449 /* The map keeps a reference on SLP nodes built, release that. */
2450 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2451 it
!= bst_map
->end (); ++it
)
2453 vect_free_slp_tree ((*it
).second
, false);
2456 /* Optimize permutations in SLP reductions. */
2457 slp_instance instance
;
2458 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2460 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2461 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2462 /* Reduction (there are no data-refs in the root).
2463 In reduction chain the order of the loads is not important. */
2464 if (!STMT_VINFO_DATA_REF (stmt_info
)
2465 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2466 vect_attempt_slp_rearrange_stmts (instance
);
2469 /* Gather all loads in the SLP graph. */
2470 hash_set
<slp_tree
> visited
;
2471 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2472 vect_gather_slp_loads (vinfo
->slp_loads
, SLP_INSTANCE_TREE (instance
),
2475 return opt_result::success ();
2479 vect_optimize_slp (vec_info
*vinfo
)
2483 FOR_EACH_VEC_ELT (vinfo
->slp_loads
, i
, node
)
2485 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2488 /* In basic block vectorization we allow any subchain of an interleaving
2490 FORNOW: not in loop SLP because of realignment complications. */
2491 if (is_a
<bb_vec_info
> (vinfo
))
2493 bool subchain_p
= true;
2494 stmt_vec_info next_load_info
= NULL
;
2495 stmt_vec_info load_info
;
2497 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2500 && (next_load_info
!= load_info
2501 || DR_GROUP_GAP (load_info
) != 1))
2506 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
2510 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2516 stmt_vec_info load_info
;
2517 bool this_load_permuted
= false;
2519 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2520 if (SLP_TREE_LOAD_PERMUTATION (node
)[j
] != j
)
2522 this_load_permuted
= true;
2525 stmt_vec_info first_stmt_info
2526 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
2527 if (!this_load_permuted
2528 /* The load requires permutation when unrolling exposes
2529 a gap either because the group is larger than the SLP
2530 group-size or because there is a gap between the groups. */
2531 && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a
<loop_vec_info
> (vinfo
)), 1U)
2532 || ((SLP_TREE_LANES (node
) == DR_GROUP_SIZE (first_stmt_info
))
2533 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2535 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2543 /* For each possible SLP instance decide whether to SLP it and calculate overall
2544 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2545 least one instance. */
2548 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2551 poly_uint64 unrolling_factor
= 1;
2552 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2553 slp_instance instance
;
2554 int decided_to_slp
= 0;
2556 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2558 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2560 /* FORNOW: SLP if you can. */
2561 /* All unroll factors have the form:
2563 GET_MODE_SIZE (vinfo->vector_mode) * X
2565 for some rational X, so they must have a common multiple. */
2567 = force_common_multiple (unrolling_factor
,
2568 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2570 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2571 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2572 loop-based vectorization. Such stmts will be marked as HYBRID. */
2573 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2577 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2579 if (decided_to_slp
&& dump_enabled_p ())
2581 dump_printf_loc (MSG_NOTE
, vect_location
,
2582 "Decided to SLP %d instances. Unrolling factor ",
2584 dump_dec (MSG_NOTE
, unrolling_factor
);
2585 dump_printf (MSG_NOTE
, "\n");
2588 return (decided_to_slp
> 0);
2591 /* Private data for vect_detect_hybrid_slp. */
2594 loop_vec_info loop_vinfo
;
2595 vec
<stmt_vec_info
> *worklist
;
2598 /* Walker for walk_gimple_op. */
2601 vect_detect_hybrid_slp (tree
*tp
, int *, void *data
)
2603 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2604 vdhs_data
*dat
= (vdhs_data
*)wi
->info
;
2609 stmt_vec_info def_stmt_info
= dat
->loop_vinfo
->lookup_def (*tp
);
2612 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
2613 if (PURE_SLP_STMT (def_stmt_info
))
2615 if (dump_enabled_p ())
2616 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2617 def_stmt_info
->stmt
);
2618 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2619 dat
->worklist
->safe_push (def_stmt_info
);
2625 /* Find stmts that must be both vectorized and SLPed. */
2628 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2630 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2632 /* All stmts participating in SLP are marked pure_slp, all other
2633 stmts are loop_vect.
2634 First collect all loop_vect stmts into a worklist. */
2635 auto_vec
<stmt_vec_info
> worklist
;
2636 for (unsigned i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2638 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2639 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
2642 gphi
*phi
= gsi
.phi ();
2643 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (phi
);
2644 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2645 worklist
.safe_push (stmt_info
);
2647 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2650 gimple
*stmt
= gsi_stmt (gsi
);
2651 if (is_gimple_debug (stmt
))
2653 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2654 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2656 for (gimple_stmt_iterator gsi2
2657 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
2658 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
2660 stmt_vec_info patt_info
2661 = loop_vinfo
->lookup_stmt (gsi_stmt (gsi2
));
2662 if (!STMT_SLP_TYPE (patt_info
)
2663 && STMT_VINFO_RELEVANT (patt_info
))
2664 worklist
.safe_push (patt_info
);
2666 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
2668 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2669 worklist
.safe_push (stmt_info
);
2673 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
2674 mark any SLP vectorized stmt as hybrid.
2675 ??? We're visiting def stmts N times (once for each non-SLP and
2676 once for each hybrid-SLP use). */
2679 dat
.worklist
= &worklist
;
2680 dat
.loop_vinfo
= loop_vinfo
;
2681 memset (&wi
, 0, sizeof (wi
));
2682 wi
.info
= (void *)&dat
;
2683 while (!worklist
.is_empty ())
2685 stmt_vec_info stmt_info
= worklist
.pop ();
2686 /* Since SSA operands are not set up for pattern stmts we need
2687 to use walk_gimple_op. */
2689 walk_gimple_op (stmt_info
->stmt
, vect_detect_hybrid_slp
, &wi
);
2694 /* Initialize a bb_vec_info struct for the statements between
2695 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2697 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2698 gimple_stmt_iterator region_end_in
,
2699 vec_info_shared
*shared
)
2700 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2701 bb (gsi_bb (region_begin_in
)),
2702 region_begin (region_begin_in
),
2703 region_end (region_end_in
)
2705 for (gimple
*stmt
: this->region_stmts ())
2707 gimple_set_uid (stmt
, 0);
2708 if (is_gimple_debug (stmt
))
2717 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2718 stmts in the basic block. */
2720 _bb_vec_info::~_bb_vec_info ()
2722 for (gimple
*stmt
: this->region_stmts ())
2723 /* Reset region marker. */
2724 gimple_set_uid (stmt
, -1);
2729 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2730 given then that child nodes have already been processed, and that
2731 their def types currently match their SLP node's def type. */
2734 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2735 slp_instance node_instance
,
2736 stmt_vector_for_cost
*cost_vec
)
2738 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
2739 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2741 /* Calculate the number of vector statements to be created for the
2742 scalar stmts in this node. For SLP reductions it is equal to the
2743 number of vector statements in the children (which has already been
2744 calculated by the recursive call). Otherwise it is the number of
2745 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2746 VF divided by the number of elements in a vector. */
2747 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2748 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2750 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2751 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2753 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2754 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2761 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2762 vf
= loop_vinfo
->vectorization_factor
;
2765 unsigned int group_size
= SLP_TREE_LANES (node
);
2766 tree vectype
= SLP_TREE_VECTYPE (node
);
2767 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2768 = vect_get_num_vectors (vf
* group_size
, vectype
);
2771 /* Handle purely internal nodes. */
2772 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
2773 return vectorizable_slp_permutation (vinfo
, NULL
, node
, cost_vec
);
2776 return vect_analyze_stmt (vinfo
, stmt_info
, &dummy
,
2777 node
, node_instance
, cost_vec
);
2780 /* Try to build NODE from scalars, returning true on success.
2781 NODE_INSTANCE is the SLP instance that contains NODE. */
2784 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
2785 slp_instance node_instance
)
2787 stmt_vec_info stmt_info
;
2790 if (!is_a
<bb_vec_info
> (vinfo
)
2791 || node
== SLP_INSTANCE_TREE (node_instance
)
2792 || !SLP_TREE_SCALAR_STMTS (node
).exists ()
2793 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
2796 if (dump_enabled_p ())
2797 dump_printf_loc (MSG_NOTE
, vect_location
,
2798 "Building vector operands from scalars instead\n");
2800 /* Don't remove and free the child nodes here, since they could be
2801 referenced by other structures. The analysis and scheduling phases
2802 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2803 unsigned int group_size
= SLP_TREE_LANES (node
);
2804 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
2805 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
);
2806 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2808 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
2809 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
2814 /* Compute the prologue cost for invariant or constant operands represented
2818 vect_prologue_cost_for_slp (slp_tree node
,
2819 stmt_vector_for_cost
*cost_vec
)
2821 /* There's a special case of an existing vector, that costs nothing. */
2822 if (SLP_TREE_SCALAR_OPS (node
).length () == 0
2823 && !SLP_TREE_VEC_DEFS (node
).is_empty ())
2825 /* Without looking at the actual initializer a vector of
2826 constants can be implemented as load from the constant pool.
2827 When all elements are the same we can use a splat. */
2828 tree vectype
= SLP_TREE_VECTYPE (node
);
2829 unsigned group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
2830 unsigned num_vects_to_check
;
2831 unsigned HOST_WIDE_INT const_nunits
;
2832 unsigned nelt_limit
;
2833 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
2834 && ! multiple_p (const_nunits
, group_size
))
2836 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
2837 nelt_limit
= const_nunits
;
2841 /* If either the vector has variable length or the vectors
2842 are composed of repeated whole groups we only need to
2843 cost construction once. All vectors will be the same. */
2844 num_vects_to_check
= 1;
2845 nelt_limit
= group_size
;
2847 tree elt
= NULL_TREE
;
2849 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
2851 unsigned si
= j
% group_size
;
2853 elt
= SLP_TREE_SCALAR_OPS (node
)[si
];
2854 /* ??? We're just tracking whether all operands of a single
2855 vector initializer are the same, ideally we'd check if
2856 we emitted the same one already. */
2857 else if (elt
!= SLP_TREE_SCALAR_OPS (node
)[si
])
2860 if (nelt
== nelt_limit
)
2862 record_stmt_cost (cost_vec
, 1,
2863 SLP_TREE_DEF_TYPE (node
) == vect_external_def
2864 ? (elt
? scalar_to_vec
: vec_construct
)
2866 NULL
, vectype
, 0, vect_prologue
);
2872 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2873 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2875 Return true if the operations are supported. */
2878 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2879 slp_instance node_instance
,
2880 hash_set
<slp_tree
> &visited
,
2881 hash_set
<slp_tree
> &lvisited
,
2882 stmt_vector_for_cost
*cost_vec
)
2887 /* Assume we can code-generate all invariants. */
2888 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2891 /* If we already analyzed the exact same set of scalar stmts we're done.
2892 We share the generated vector stmts for those.
2893 The SLP graph is acyclic so not caching whether we failed or succeeded
2894 doesn't result in any issue since we throw away the lvisited set
2896 if (visited
.contains (node
)
2897 || lvisited
.contains (node
))
2901 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2903 res
= vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2904 visited
, lvisited
, cost_vec
);
2911 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2914 lvisited
.add (node
);
2917 /* When the node can be vectorized cost invariant nodes it references.
2918 This is not done in DFS order to allow the refering node
2919 vectorizable_* calls to nail down the invariant nodes vector type
2920 and possibly unshare it if it needs a different vector type than
2923 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2924 if ((SLP_TREE_DEF_TYPE (child
) == vect_constant_def
2925 || SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
2926 /* Perform usual caching, note code-generation still
2927 code-gens these nodes multiple times but we expect
2928 to CSE them later. */
2929 && !visited
.contains (child
)
2930 && !lvisited
.add (child
))
2932 /* ??? After auditing more code paths make a "default"
2933 and push the vector type from NODE to all children
2934 if it is not already set. */
2935 /* Compute the number of vectors to be generated. */
2936 tree vector_type
= SLP_TREE_VECTYPE (child
);
2939 /* For shifts with a scalar argument we don't need
2940 to cost or code-generate anything.
2941 ??? Represent this more explicitely. */
2942 gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
2943 == shift_vec_info_type
)
2947 unsigned group_size
= SLP_TREE_LANES (child
);
2949 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2950 vf
= loop_vinfo
->vectorization_factor
;
2951 SLP_TREE_NUMBER_OF_VEC_STMTS (child
)
2952 = vect_get_num_vectors (vf
* group_size
, vector_type
);
2953 /* And cost them. */
2954 vect_prologue_cost_for_slp (child
, cost_vec
);
2957 /* If this node or any of its children can't be vectorized, try pruning
2958 the tree here rather than felling the whole thing. */
2959 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
2961 /* We'll need to revisit this for invariant costing and number
2962 of vectorized stmt setting. */
2970 /* Analyze statements in SLP instances of VINFO. Return true if the
2971 operations are supported. */
2974 vect_slp_analyze_operations (vec_info
*vinfo
)
2976 slp_instance instance
;
2979 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2981 hash_set
<slp_tree
> visited
;
2982 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2984 hash_set
<slp_tree
> lvisited
;
2985 stmt_vector_for_cost cost_vec
;
2986 cost_vec
.create (2);
2987 if (!vect_slp_analyze_node_operations (vinfo
,
2988 SLP_INSTANCE_TREE (instance
),
2989 instance
, visited
, lvisited
,
2991 /* Instances with a root stmt require vectorized defs for the
2993 || (SLP_INSTANCE_ROOT_STMT (instance
)
2994 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
2995 != vect_internal_def
)))
2997 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2998 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2999 if (dump_enabled_p ())
3000 dump_printf_loc (MSG_NOTE
, vect_location
,
3001 "removing SLP instance operations starting from: %G",
3003 vect_free_slp_instance (instance
, false);
3004 vinfo
->slp_instances
.ordered_remove (i
);
3005 cost_vec
.release ();
3009 for (hash_set
<slp_tree
>::iterator x
= lvisited
.begin();
3010 x
!= lvisited
.end(); ++x
)
3014 add_stmt_costs (vinfo
, vinfo
->target_cost_data
, &cost_vec
);
3015 cost_vec
.release ();
3019 return !vinfo
->slp_instances
.is_empty ();
3023 /* Compute the scalar cost of the SLP node NODE and its children
3024 and return it. Do not account defs that are marked in LIFE and
3025 update LIFE according to uses of NODE. */
3028 vect_bb_slp_scalar_cost (vec_info
*vinfo
,
3029 slp_tree node
, vec
<bool, va_heap
> *life
,
3030 stmt_vector_for_cost
*cost_vec
,
3031 hash_set
<slp_tree
> &visited
)
3034 stmt_vec_info stmt_info
;
3037 if (visited
.add (node
))
3040 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3042 gimple
*stmt
= stmt_info
->stmt
;
3043 ssa_op_iter op_iter
;
3044 def_operand_p def_p
;
3049 /* If there is a non-vectorized use of the defs then the scalar
3050 stmt is kept live in which case we do not account it or any
3051 required defs in the SLP children in the scalar cost. This
3052 way we make the vectorization more costly when compared to
3054 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
3056 imm_use_iterator use_iter
;
3058 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3059 if (!is_gimple_debug (use_stmt
))
3061 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
3062 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
3065 BREAK_FROM_IMM_USE_STMT (use_iter
);
3072 /* Count scalar stmts only once. */
3073 if (gimple_visited_p (stmt
))
3075 gimple_set_visited (stmt
, true);
3077 vect_cost_for_stmt kind
;
3078 if (STMT_VINFO_DATA_REF (stmt_info
))
3080 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
3083 kind
= scalar_store
;
3085 else if (vect_nop_conversion_p (stmt_info
))
3089 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
3092 auto_vec
<bool, 20> subtree_life
;
3093 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3095 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3097 /* Do not directly pass LIFE to the recursive call, copy it to
3098 confine changes in the callee to the current child/subtree. */
3099 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
3101 subtree_life
.safe_grow_cleared (SLP_TREE_LANES (child
));
3102 for (unsigned j
= 0;
3103 j
< SLP_TREE_LANE_PERMUTATION (node
).length (); ++j
)
3105 auto perm
= SLP_TREE_LANE_PERMUTATION (node
)[j
];
3106 if (perm
.first
== i
)
3107 subtree_life
[perm
.second
] = (*life
)[j
];
3112 gcc_assert (SLP_TREE_LANES (node
) == SLP_TREE_LANES (child
));
3113 subtree_life
.safe_splice (*life
);
3115 vect_bb_slp_scalar_cost (vinfo
, child
, &subtree_life
, cost_vec
,
3117 subtree_life
.truncate (0);
3122 /* Check if vectorization of the basic block is profitable. */
3125 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
3127 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3128 slp_instance instance
;
3130 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
3131 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
3133 /* Calculate scalar cost. */
3134 stmt_vector_for_cost scalar_costs
;
3135 scalar_costs
.create (0);
3136 hash_set
<slp_tree
> visited
;
3137 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3139 auto_vec
<bool, 20> life
;
3140 life
.safe_grow_cleared (SLP_TREE_LANES (SLP_INSTANCE_TREE (instance
)));
3141 vect_bb_slp_scalar_cost (bb_vinfo
,
3142 SLP_INSTANCE_TREE (instance
),
3143 &life
, &scalar_costs
, visited
);
3145 /* Unset visited flag. */
3146 stmt_info_for_cost
*si
;
3147 FOR_EACH_VEC_ELT (scalar_costs
, i
, si
)
3148 gimple_set_visited (si
->stmt_info
->stmt
, false);
3150 void *target_cost_data
= init_cost (NULL
);
3151 add_stmt_costs (bb_vinfo
, target_cost_data
, &scalar_costs
);
3152 scalar_costs
.release ();
3154 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
3155 destroy_cost_data (target_cost_data
);
3157 /* Complete the target-specific cost calculation. */
3158 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
3159 &vec_inside_cost
, &vec_epilogue_cost
);
3161 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
3163 if (dump_enabled_p ())
3165 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3166 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
3168 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
3169 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
3170 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
3173 /* Vectorization is profitable if its cost is more than the cost of scalar
3174 version. Note that we err on the vector side for equal cost because
3175 the cost estimate is otherwise quite pessimistic (constant uses are
3176 free on the scalar side but cost a load on the vector side for
3178 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3184 /* Find any vectorizable constructors and add them to the grouped_store
3188 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
3190 for (gimple
*stmt
: bb_vinfo
->region_stmts ())
3192 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
3193 if (!assign
|| gimple_assign_rhs_code (assign
) != CONSTRUCTOR
)
3196 tree rhs
= gimple_assign_rhs1 (assign
);
3197 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
3198 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
3199 CONSTRUCTOR_NELTS (rhs
))
3200 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
3201 || uniform_vector_p (rhs
))
3204 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (assign
);
3205 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3209 /* Check if the region described by BB_VINFO can be vectorized, returning
3210 true if so. When returning false, set FATAL to true if the same failure
3211 would prevent vectorization at other vector sizes, false if it is still
3212 worth trying other sizes. N_STMTS is the number of statements in the
3216 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
3218 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3220 slp_instance instance
;
3222 poly_uint64 min_vf
= 2;
3224 /* The first group of checks is independent of the vector size. */
3227 /* Analyze the data references. */
3229 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3231 if (dump_enabled_p ())
3232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3233 "not vectorized: unhandled data-ref in basic "
3238 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3240 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3242 "not vectorized: unhandled data access in "
3247 vect_slp_check_for_constructors (bb_vinfo
);
3249 /* If there are no grouped stores and no constructors in the region
3250 there is no need to continue with pattern recog as vect_analyze_slp
3251 will fail anyway. */
3252 if (bb_vinfo
->grouped_stores
.is_empty ())
3254 if (dump_enabled_p ())
3255 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3256 "not vectorized: no grouped stores in "
3261 /* While the rest of the analysis below depends on it in some way. */
3264 vect_pattern_recog (bb_vinfo
);
3266 /* Check the SLP opportunities in the basic block, analyze and build SLP
3268 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3270 if (dump_enabled_p ())
3272 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3273 "Failed to SLP the basic block.\n");
3274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3275 "not vectorized: failed to find SLP opportunities "
3276 "in basic block.\n");
3281 /* Optimize permutations. */
3282 vect_optimize_slp (bb_vinfo
);
3284 vect_record_base_alignments (bb_vinfo
);
3286 /* Analyze and verify the alignment of data references and the
3287 dependence in the SLP instances. */
3288 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3290 if (! vect_slp_analyze_and_verify_instance_alignment (bb_vinfo
, instance
)
3291 || ! vect_slp_analyze_instance_dependence (bb_vinfo
, instance
))
3293 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3294 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3295 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_NOTE
, vect_location
,
3297 "removing SLP instance operations starting from: %G",
3299 vect_free_slp_instance (instance
, false);
3300 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3304 /* Mark all the statements that we want to vectorize as pure SLP and
3306 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3307 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3308 if (SLP_INSTANCE_ROOT_STMT (instance
))
3309 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3313 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3316 if (!vect_slp_analyze_operations (bb_vinfo
))
3318 if (dump_enabled_p ())
3319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3320 "not vectorized: bad operation in basic block.\n");
3324 /* Cost model: check if the vectorization is worthwhile. */
3325 if (!unlimited_cost_model (NULL
)
3326 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3328 if (dump_enabled_p ())
3329 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3330 "not vectorized: vectorization is not "
3335 if (dump_enabled_p ())
3336 dump_printf_loc (MSG_NOTE
, vect_location
,
3337 "Basic block will be vectorized using SLP\n");
3341 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3342 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3343 on success. The region has N_STMTS statements and has the datarefs
3344 given by DATAREFS. */
3347 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3348 gimple_stmt_iterator region_end
,
3349 vec
<data_reference_p
> datarefs
,
3350 unsigned int n_stmts
)
3352 bb_vec_info bb_vinfo
;
3353 auto_vector_modes vector_modes
;
3355 /* Autodetect first vector size we try. */
3356 machine_mode next_vector_mode
= VOIDmode
;
3357 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3358 unsigned int mode_i
= 0;
3360 vec_info_shared shared
;
3362 machine_mode autodetected_vector_mode
= VOIDmode
;
3365 bool vectorized
= false;
3367 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3369 bool first_time_p
= shared
.datarefs
.is_empty ();
3370 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3372 bb_vinfo
->shared
->save_datarefs ();
3374 bb_vinfo
->shared
->check_datarefs ();
3375 bb_vinfo
->vector_mode
= next_vector_mode
;
3377 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3378 && dbg_cnt (vect_slp
))
3380 if (dump_enabled_p ())
3382 dump_printf_loc (MSG_NOTE
, vect_location
,
3383 "***** Analysis succeeded with vector mode"
3384 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
3385 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3388 bb_vinfo
->shared
->check_datarefs ();
3389 vect_schedule_slp (bb_vinfo
);
3391 unsigned HOST_WIDE_INT bytes
;
3392 if (dump_enabled_p ())
3394 if (GET_MODE_SIZE (bb_vinfo
->vector_mode
).is_constant (&bytes
))
3395 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3396 "basic block part vectorized using %wu byte "
3397 "vectors\n", bytes
);
3399 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3400 "basic block part vectorized using variable "
3401 "length vectors\n");
3408 if (dump_enabled_p ())
3409 dump_printf_loc (MSG_NOTE
, vect_location
,
3410 "***** Analysis failed with vector mode %s\n",
3411 GET_MODE_NAME (bb_vinfo
->vector_mode
));
3415 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
3418 while (mode_i
< vector_modes
.length ()
3419 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
3421 if (dump_enabled_p ())
3422 dump_printf_loc (MSG_NOTE
, vect_location
,
3423 "***** The result for vector mode %s would"
3425 GET_MODE_NAME (vector_modes
[mode_i
]));
3431 if (mode_i
< vector_modes
.length ()
3432 && VECTOR_MODE_P (autodetected_vector_mode
)
3433 && (related_vector_mode (vector_modes
[mode_i
],
3434 GET_MODE_INNER (autodetected_vector_mode
))
3435 == autodetected_vector_mode
)
3436 && (related_vector_mode (autodetected_vector_mode
,
3437 GET_MODE_INNER (vector_modes
[mode_i
]))
3438 == vector_modes
[mode_i
]))
3440 if (dump_enabled_p ())
3441 dump_printf_loc (MSG_NOTE
, vect_location
,
3442 "***** Skipping vector mode %s, which would"
3443 " repeat the analysis for %s\n",
3444 GET_MODE_NAME (vector_modes
[mode_i
]),
3445 GET_MODE_NAME (autodetected_vector_mode
));
3450 || mode_i
== vector_modes
.length ()
3451 || autodetected_vector_mode
== VOIDmode
3452 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3453 vector sizes will fail do not bother iterating. */
3457 /* Try the next biggest vector size. */
3458 next_vector_mode
= vector_modes
[mode_i
++];
3459 if (dump_enabled_p ())
3460 dump_printf_loc (MSG_NOTE
, vect_location
,
3461 "***** Re-trying analysis with vector mode %s\n",
3462 GET_MODE_NAME (next_vector_mode
));
3466 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3467 true if anything in the basic-block was vectorized. */
3470 vect_slp_bb (basic_block bb
)
3472 gimple_stmt_iterator gsi
;
3473 bool any_vectorized
= false;
3475 gsi
= gsi_after_labels (bb
);
3476 while (!gsi_end_p (gsi
))
3478 gimple_stmt_iterator region_begin
= gsi
;
3479 vec
<data_reference_p
> datarefs
= vNULL
;
3482 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3484 gimple
*stmt
= gsi_stmt (gsi
);
3485 if (is_gimple_debug (stmt
))
3487 /* Skip leading debug stmts. */
3488 if (gsi_stmt (region_begin
) == stmt
)
3489 gsi_next (®ion_begin
);
3494 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3495 vect_location
= stmt
;
3497 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3500 if (gsi_end_p (region_begin
))
3503 /* Skip leading unhandled stmts. */
3504 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3510 gimple_stmt_iterator region_end
= gsi
;
3512 if (insns
> param_slp_max_insns_in_bb
)
3514 if (dump_enabled_p ())
3515 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3516 "not vectorized: too many instructions in "
3519 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3520 any_vectorized
= true;
3522 if (gsi_end_p (region_end
))
3525 /* Skip the unhandled stmt. */
3529 return any_vectorized
;
3533 /* Build a variable-length vector in which the elements in ELTS are repeated
3534 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3535 RESULTS and add any new instructions to SEQ.
3537 The approach we use is:
3539 (1) Find a vector mode VM with integer elements of mode IM.
3541 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3542 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3543 from small vectors to IM.
3545 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3547 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3548 correct byte contents.
3550 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3552 We try to find the largest IM for which this sequence works, in order
3553 to cut down on the number of interleaves. */
3556 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3557 vec
<tree
> elts
, unsigned int nresults
,
3560 unsigned int nelts
= elts
.length ();
3561 tree element_type
= TREE_TYPE (vector_type
);
3563 /* (1) Find a vector mode VM with integer elements of mode IM. */
3564 unsigned int nvectors
= 1;
3565 tree new_vector_type
;
3567 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
3568 &nvectors
, &new_vector_type
,
3572 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3573 unsigned int partial_nelts
= nelts
/ nvectors
;
3574 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3576 tree_vector_builder partial_elts
;
3577 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3578 pieces
.quick_grow (nvectors
* 2);
3579 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3581 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3582 ELTS' has mode IM. */
3583 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3584 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3585 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3586 tree t
= gimple_build_vector (seq
, &partial_elts
);
3587 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3588 TREE_TYPE (new_vector_type
), t
);
3590 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3591 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3594 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3595 correct byte contents.
3597 We need to repeat the following operation log2(nvectors) times:
3599 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3600 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3602 However, if each input repeats every N elements and the VF is
3603 a multiple of N * 2, the HI result is the same as the LO. */
3604 unsigned int in_start
= 0;
3605 unsigned int out_start
= nvectors
;
3606 unsigned int hi_start
= nvectors
/ 2;
3607 /* A bound on the number of outputs needed to produce NRESULTS results
3608 in the final iteration. */
3609 unsigned int noutputs_bound
= nvectors
* nresults
;
3610 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3612 noutputs_bound
/= 2;
3613 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3614 for (unsigned int i
= 0; i
< limit
; ++i
)
3617 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3620 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3624 tree output
= make_ssa_name (new_vector_type
);
3625 tree input1
= pieces
[in_start
+ (i
/ 2)];
3626 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3627 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3630 gimple_seq_add_stmt (seq
, stmt
);
3631 pieces
[out_start
+ i
] = output
;
3633 std::swap (in_start
, out_start
);
3636 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3637 results
.reserve (nresults
);
3638 for (unsigned int i
= 0; i
< nresults
; ++i
)
3640 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3641 pieces
[in_start
+ i
]));
3643 results
.quick_push (results
[i
- nvectors
]);
3647 /* For constant and loop invariant defs in OP_NODE this function creates
3648 vector defs that will be used in the vectorized stmts and stores them
3649 to SLP_TREE_VEC_DEFS of OP_NODE. */
3652 vect_create_constant_vectors (vec_info
*vinfo
, slp_tree op_node
)
3654 unsigned HOST_WIDE_INT nunits
;
3656 unsigned j
, number_of_places_left_in_vector
;
3659 int group_size
= op_node
->ops
.length ();
3660 unsigned int vec_num
, i
;
3661 unsigned number_of_copies
= 1;
3663 gimple_seq ctor_seq
= NULL
;
3664 auto_vec
<tree
, 16> permute_results
;
3666 /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
3667 vector_type
= SLP_TREE_VECTYPE (op_node
);
3669 unsigned int number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (op_node
);
3670 SLP_TREE_VEC_DEFS (op_node
).create (number_of_vectors
);
3671 auto_vec
<tree
> voprnds (number_of_vectors
);
3673 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3674 created vectors. It is greater than 1 if unrolling is performed.
3676 For example, we have two scalar operands, s1 and s2 (e.g., group of
3677 strided accesses of size two), while NUNITS is four (i.e., four scalars
3678 of this type can be packed in a vector). The output vector will contain
3679 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3682 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3683 containing the operands.
3685 For example, NUNITS is four as before, and the group size is 8
3686 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3687 {s5, s6, s7, s8}. */
3689 /* When using duplicate_and_interleave, we just need one element for
3690 each scalar statement. */
3691 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3692 nunits
= group_size
;
3694 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3696 number_of_places_left_in_vector
= nunits
;
3698 tree_vector_builder
elts (vector_type
, nunits
, 1);
3699 elts
.quick_grow (nunits
);
3700 stmt_vec_info insert_after
= NULL
;
3701 for (j
= 0; j
< number_of_copies
; j
++)
3704 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3706 /* Create 'vect_ = {op0,op1,...,opn}'. */
3707 number_of_places_left_in_vector
--;
3709 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3711 if (CONSTANT_CLASS_P (op
))
3713 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3715 /* Can't use VIEW_CONVERT_EXPR for booleans because
3716 of possibly different sizes of scalar value and
3718 if (integer_zerop (op
))
3719 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3720 else if (integer_onep (op
))
3721 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3726 op
= fold_unary (VIEW_CONVERT_EXPR
,
3727 TREE_TYPE (vector_type
), op
);
3728 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3732 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3734 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3737 = build_all_ones_cst (TREE_TYPE (vector_type
));
3739 = build_zero_cst (TREE_TYPE (vector_type
));
3740 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3741 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3747 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3750 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3753 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3757 elts
[number_of_places_left_in_vector
] = op
;
3758 if (!CONSTANT_CLASS_P (op
))
3760 /* For BB vectorization we have to compute an insert location
3761 when a def is inside the analyzed region since we cannot
3762 simply insert at the BB start in this case. */
3763 stmt_vec_info opdef
;
3764 if (TREE_CODE (orig_op
) == SSA_NAME
3765 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3766 && is_a
<bb_vec_info
> (vinfo
)
3767 && (opdef
= vinfo
->lookup_def (orig_op
)))
3770 insert_after
= opdef
;
3772 insert_after
= get_later_stmt (insert_after
, opdef
);
3775 if (number_of_places_left_in_vector
== 0)
3778 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3779 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3780 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3783 if (permute_results
.is_empty ())
3784 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3785 elts
, number_of_vectors
,
3787 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3789 if (!gimple_seq_empty_p (ctor_seq
))
3793 gimple_stmt_iterator gsi
3794 = gsi_for_stmt (insert_after
->stmt
);
3795 gsi_insert_seq_after (&gsi
, ctor_seq
,
3796 GSI_CONTINUE_LINKING
);
3799 vinfo
->insert_seq_on_entry (NULL
, ctor_seq
);
3802 voprnds
.quick_push (vec_cst
);
3803 insert_after
= NULL
;
3804 number_of_places_left_in_vector
= nunits
;
3806 elts
.new_vector (vector_type
, nunits
, 1);
3807 elts
.quick_grow (nunits
);
3812 /* Since the vectors are created in the reverse order, we should invert
3814 vec_num
= voprnds
.length ();
3815 for (j
= vec_num
; j
!= 0; j
--)
3817 vop
= voprnds
[j
- 1];
3818 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
3821 /* In case that VF is greater than the unrolling factor needed for the SLP
3822 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3823 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3824 to replicate the vectors. */
3825 while (number_of_vectors
> SLP_TREE_VEC_DEFS (op_node
).length ())
3826 for (i
= 0; SLP_TREE_VEC_DEFS (op_node
).iterate (i
, &vop
) && i
< vec_num
;
3828 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
3831 /* Get the Ith vectorized definition from SLP_NODE. */
3834 vect_get_slp_vect_def (slp_tree slp_node
, unsigned i
)
3836 if (SLP_TREE_VEC_STMTS (slp_node
).exists ())
3837 return gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]);
3839 return SLP_TREE_VEC_DEFS (slp_node
)[i
];
3842 /* Get the vectorized definitions of SLP_NODE in *VEC_DEFS. */
3845 vect_get_slp_defs (slp_tree slp_node
, vec
<tree
> *vec_defs
)
3847 vec_defs
->create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
));
3848 if (SLP_TREE_DEF_TYPE (slp_node
) == vect_internal_def
)
3851 gimple
*vec_def_stmt
;
3852 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), j
, vec_def_stmt
)
3853 vec_defs
->quick_push (gimple_get_lhs (vec_def_stmt
));
3856 vec_defs
->splice (SLP_TREE_VEC_DEFS (slp_node
));
3859 /* Get N vectorized definitions for SLP_NODE. */
3862 vect_get_slp_defs (vec_info
*,
3863 slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3866 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3868 for (unsigned i
= 0; i
< n
; ++i
)
3870 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3871 vec
<tree
> vec_defs
= vNULL
;
3872 vect_get_slp_defs (child
, &vec_defs
);
3873 vec_oprnds
->quick_push (vec_defs
);
3877 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3878 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3879 permute statements for the SLP node NODE. */
3882 vect_transform_slp_perm_load (vec_info
*vinfo
,
3883 slp_tree node
, vec
<tree
> dr_chain
,
3884 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3885 bool analyze_only
, unsigned *n_perms
)
3887 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3889 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3890 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
3891 unsigned int mask_element
;
3894 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3897 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3899 mode
= TYPE_MODE (vectype
);
3900 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3902 /* Initialize the vect stmts of NODE to properly insert the generated
3905 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3906 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3907 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3909 /* Generate permutation masks for every NODE. Number of masks for each NODE
3910 is equal to GROUP_SIZE.
3911 E.g., we have a group of three nodes with three loads from the same
3912 location in each node, and the vector size is 4. I.e., we have a
3913 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3914 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3915 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3918 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3919 The last mask is illegal since we assume two operands for permute
3920 operation, and the mask element values can't be outside that range.
3921 Hence, the last mask must be converted into {2,5,5,5}.
3922 For the first two permutations we need the first and the second input
3923 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3924 we need the second and the third vectors: {b1,c1,a2,b2} and
3927 int vect_stmts_counter
= 0;
3928 unsigned int index
= 0;
3929 int first_vec_index
= -1;
3930 int second_vec_index
= -1;
3934 vec_perm_builder mask
;
3935 unsigned int nelts_to_build
;
3936 unsigned int nvectors_per_build
;
3937 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3938 && multiple_p (nunits
, group_size
));
3941 /* A single vector contains a whole number of copies of the node, so:
3942 (a) all permutes can use the same mask; and
3943 (b) the permutes only need a single vector input. */
3944 mask
.new_vector (nunits
, group_size
, 3);
3945 nelts_to_build
= mask
.encoded_nelts ();
3946 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3950 /* We need to construct a separate mask for each vector statement. */
3951 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3952 if (!nunits
.is_constant (&const_nunits
)
3953 || !vf
.is_constant (&const_vf
))
3955 mask
.new_vector (const_nunits
, const_nunits
, 1);
3956 nelts_to_build
= const_vf
* group_size
;
3957 nvectors_per_build
= 1;
3960 unsigned int count
= mask
.encoded_nelts ();
3961 mask
.quick_grow (count
);
3962 vec_perm_indices indices
;
3964 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3966 unsigned int iter_num
= j
/ group_size
;
3967 unsigned int stmt_num
= j
% group_size
;
3968 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
3969 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
3972 first_vec_index
= 0;
3977 /* Enforced before the loop when !repeating_p. */
3978 unsigned int const_nunits
= nunits
.to_constant ();
3979 vec_index
= i
/ const_nunits
;
3980 mask_element
= i
% const_nunits
;
3981 if (vec_index
== first_vec_index
3982 || first_vec_index
== -1)
3984 first_vec_index
= vec_index
;
3986 else if (vec_index
== second_vec_index
3987 || second_vec_index
== -1)
3989 second_vec_index
= vec_index
;
3990 mask_element
+= const_nunits
;
3994 if (dump_enabled_p ())
3995 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3996 "permutation requires at "
3997 "least three vectors %G",
3999 gcc_assert (analyze_only
);
4003 gcc_assert (mask_element
< 2 * const_nunits
);
4006 if (mask_element
!= index
)
4008 mask
[index
++] = mask_element
;
4010 if (index
== count
&& !noop_p
)
4012 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
4013 if (!can_vec_perm_const_p (mode
, indices
))
4015 if (dump_enabled_p ())
4017 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4019 "unsupported vect permute { ");
4020 for (i
= 0; i
< count
; ++i
)
4022 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4023 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4025 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4027 gcc_assert (analyze_only
);
4038 tree mask_vec
= NULL_TREE
;
4041 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4043 if (second_vec_index
== -1)
4044 second_vec_index
= first_vec_index
;
4046 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
4048 /* Generate the permute statement if necessary. */
4049 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
4050 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
4054 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4056 = vect_create_destination_var (gimple_assign_lhs (stmt
),
4058 perm_dest
= make_ssa_name (perm_dest
);
4060 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4061 first_vec
, second_vec
,
4063 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
,
4067 /* If mask was NULL_TREE generate the requested
4068 identity transform. */
4069 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
4071 /* Store the vector statement in NODE. */
4072 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
4077 first_vec_index
= -1;
4078 second_vec_index
= -1;
4087 /* Vectorize the SLP permutations in NODE as specified
4088 in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
4089 child number and lane number.
4090 Interleaving of two two-lane two-child SLP subtrees (not supported):
4091 [ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
4092 A blend of two four-lane two-child SLP subtrees:
4093 [ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
4094 Highpart of a four-lane one-child SLP subtree (not supported):
4095 [ { 0, 2 }, { 0, 3 } ]
4096 Where currently only a subset is supported by code generating below. */
4099 vectorizable_slp_permutation (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
4100 slp_tree node
, stmt_vector_for_cost
*cost_vec
)
4102 tree vectype
= SLP_TREE_VECTYPE (node
);
4104 /* ??? We currently only support all same vector input and output types
4105 while the SLP IL should really do a concat + select and thus accept
4106 arbitrary mismatches. */
4109 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4110 if (!types_compatible_p (SLP_TREE_VECTYPE (child
), vectype
))
4112 if (dump_enabled_p ())
4113 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4114 "Unsupported lane permutation\n");
4118 vec
<std::pair
<unsigned, unsigned> > &perm
= SLP_TREE_LANE_PERMUTATION (node
);
4119 gcc_assert (perm
.length () == SLP_TREE_LANES (node
));
4120 if (dump_enabled_p ())
4122 dump_printf_loc (MSG_NOTE
, vect_location
,
4123 "vectorizing permutation");
4124 for (unsigned i
= 0; i
< perm
.length (); ++i
)
4125 dump_printf (MSG_NOTE
, " op%u[%u]", perm
[i
].first
, perm
[i
].second
);
4126 dump_printf (MSG_NOTE
, "\n");
4129 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4130 if (!nunits
.is_constant ())
4132 unsigned HOST_WIDE_INT vf
= 1;
4133 if (loop_vec_info linfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4134 if (!LOOP_VINFO_VECT_FACTOR (linfo
).is_constant (&vf
))
4136 unsigned olanes
= vf
* SLP_TREE_LANES (node
);
4137 gcc_assert (multiple_p (olanes
, nunits
));
4139 /* Compute the { { SLP operand, vector index}, lane } permutation sequence
4140 from the { SLP operand, scalar lane } permutation as recorded in the
4141 SLP node as intermediate step. This part should already work
4142 with SLP children with arbitrary number of lanes. */
4143 auto_vec
<std::pair
<std::pair
<unsigned, unsigned>, unsigned> > vperm
;
4144 auto_vec
<unsigned> active_lane
;
4145 vperm
.create (olanes
);
4146 active_lane
.safe_grow_cleared (SLP_TREE_CHILDREN (node
).length ());
4147 for (unsigned i
= 0; i
< vf
; ++i
)
4149 for (unsigned pi
= 0; pi
< perm
.length (); ++pi
)
4151 std::pair
<unsigned, unsigned> p
= perm
[pi
];
4152 tree vtype
= SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (node
)[p
.first
]);
4153 unsigned vnunits
= TYPE_VECTOR_SUBPARTS (vtype
).to_constant ();
4154 unsigned vi
= (active_lane
[p
.first
] + p
.second
) / vnunits
;
4155 unsigned vl
= (active_lane
[p
.first
] + p
.second
) % vnunits
;
4156 vperm
.quick_push (std::make_pair (std::make_pair (p
.first
, vi
), vl
));
4158 /* Advance to the next group. */
4159 for (unsigned j
= 0; j
< SLP_TREE_CHILDREN (node
).length (); ++j
)
4160 active_lane
[j
] += SLP_TREE_LANES (SLP_TREE_CHILDREN (node
)[j
]);
4163 if (dump_enabled_p ())
4165 dump_printf_loc (MSG_NOTE
, vect_location
, "as");
4166 for (unsigned i
= 0; i
< vperm
.length (); ++i
)
4168 if (i
!= 0 && multiple_p (i
, TYPE_VECTOR_SUBPARTS (vectype
)))
4169 dump_printf (MSG_NOTE
, ",");
4170 dump_printf (MSG_NOTE
, " vops%u[%u][%u]",
4171 vperm
[i
].first
.first
, vperm
[i
].first
.second
,
4172 vperm
[i
].first
.second
);
4174 dump_printf (MSG_NOTE
, "\n");
4177 /* We can only handle two-vector permutes, everything else should
4178 be lowered on the SLP level. The following is closely inspired
4179 by vect_transform_slp_perm_load and is supposed to eventually
4181 ??? As intermediate step do code-gen in the SLP tree representation
4183 std::pair
<unsigned, unsigned> first_vec
= std::make_pair (-1U, -1U);
4184 std::pair
<unsigned, unsigned> second_vec
= std::make_pair (-1U, -1U);
4185 unsigned int const_nunits
= nunits
.to_constant ();
4186 unsigned int index
= 0;
4187 unsigned int mask_element
;
4188 vec_perm_builder mask
;
4189 mask
.new_vector (const_nunits
, const_nunits
, 1);
4190 unsigned int count
= mask
.encoded_nelts ();
4191 mask
.quick_grow (count
);
4192 vec_perm_indices indices
;
4193 unsigned nperms
= 0;
4194 for (unsigned i
= 0; i
< vperm
.length (); ++i
)
4196 mask_element
= vperm
[i
].second
;
4197 if (first_vec
.first
== -1U
4198 || first_vec
== vperm
[i
].first
)
4199 first_vec
= vperm
[i
].first
;
4200 else if (second_vec
.first
== -1U
4201 || second_vec
== vperm
[i
].first
)
4203 second_vec
= vperm
[i
].first
;
4204 mask_element
+= const_nunits
;
4208 if (dump_enabled_p ())
4209 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4210 "permutation requires at "
4211 "least three vectors");
4216 mask
[index
++] = mask_element
;
4220 indices
.new_vector (mask
, second_vec
.first
== -1U ? 1 : 2,
4222 bool identity_p
= indices
.series_p (0, 1, 0, 1);
4224 && !can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
4226 if (dump_enabled_p ())
4228 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4230 "unsupported vect permute { ");
4231 for (i
= 0; i
< count
; ++i
)
4233 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4234 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4236 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4246 if (second_vec
.first
== -1U)
4247 second_vec
= first_vec
;
4249 /* Generate the permute statement if necessary. */
4250 slp_tree first_node
= SLP_TREE_CHILDREN (node
)[first_vec
.first
];
4252 = vect_get_slp_vect_def (first_node
, first_vec
.second
);
4254 tree perm_dest
= make_ssa_name (vectype
);
4257 slp_tree second_node
4258 = SLP_TREE_CHILDREN (node
)[second_vec
.first
];
4260 = vect_get_slp_vect_def (second_node
, second_vec
.second
);
4261 tree mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4262 perm_stmt
= gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4263 first_def
, second_def
,
4267 /* We need a copy here in case the def was external. */
4268 perm_stmt
= gimple_build_assign (perm_dest
, first_def
);
4269 vect_finish_stmt_generation (vinfo
, NULL
, perm_stmt
, gsi
);
4270 /* Store the vector statement in NODE. */
4271 SLP_TREE_VEC_STMTS (node
).quick_push (perm_stmt
);
4275 first_vec
= std::make_pair (-1U, -1U);
4276 second_vec
= std::make_pair (-1U, -1U);
4281 record_stmt_cost (cost_vec
, nperms
, vec_perm
, NULL
, vectype
, 0, vect_body
);
4286 /* Vectorize SLP instance tree in postorder. */
4289 vect_schedule_slp_instance (vec_info
*vinfo
,
4290 slp_tree node
, slp_instance instance
)
4292 gimple_stmt_iterator si
;
4296 /* See if we have already vectorized the node in the graph of the
4298 if ((SLP_TREE_DEF_TYPE (node
) == vect_internal_def
4299 && SLP_TREE_VEC_STMTS (node
).exists ())
4300 || SLP_TREE_VEC_DEFS (node
).exists ())
4303 /* Vectorize externals and constants. */
4304 if (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
4305 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
)
4307 /* ??? vectorizable_shift can end up using a scalar operand which is
4308 currently denoted as !SLP_TREE_VECTYPE. No need to vectorize the
4309 node in this case. */
4310 if (!SLP_TREE_VECTYPE (node
))
4313 vect_create_constant_vectors (vinfo
, node
);
4317 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4318 vect_schedule_slp_instance (vinfo
, child
, instance
);
4320 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
4321 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4323 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
4324 if (dump_enabled_p ())
4325 dump_printf_loc (MSG_NOTE
, vect_location
,
4326 "------>vectorizing SLP node starting from: %G",
4329 if (STMT_VINFO_DATA_REF (stmt_info
)
4330 && SLP_TREE_CODE (node
) != VEC_PERM_EXPR
)
4332 /* Vectorized loads go before the first scalar load to make it
4333 ready early, vectorized stores go before the last scalar
4334 stmt which is where all uses are ready. */
4335 stmt_vec_info last_stmt_info
= NULL
;
4336 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
4337 last_stmt_info
= vect_find_first_scalar_stmt_in_slp (node
);
4338 else /* DR_IS_WRITE */
4339 last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
4340 si
= gsi_for_stmt (last_stmt_info
->stmt
);
4342 else if (SLP_TREE_CHILDREN (node
).is_empty ())
4344 /* This happens for reduction and induction PHIs where we do not use the
4345 insertion iterator. */
4346 gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
4347 == cycle_phi_info_type
4348 || (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
4349 == induc_vec_info_type
));
4354 /* Emit other stmts after the children vectorized defs which is
4355 earliest possible. */
4356 gimple
*last_stmt
= NULL
;
4357 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4358 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
4360 /* For fold-left reductions we are retaining the scalar
4361 reduction PHI but we still have SLP_TREE_NUM_VEC_STMTS
4362 set so the representation isn't perfect. Resort to the
4363 last scalar def here. */
4364 if (SLP_TREE_VEC_STMTS (child
).is_empty ())
4366 gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (child
))
4367 == cycle_phi_info_type
);
4368 gphi
*phi
= as_a
<gphi
*>
4369 (vect_find_last_scalar_stmt_in_slp (child
)->stmt
);
4371 || vect_stmt_dominates_stmt_p (last_stmt
, phi
))
4374 /* We are emitting all vectorized stmts in the same place and
4375 the last one is the last.
4376 ??? Unless we have a load permutation applied and that
4377 figures to re-use an earlier generated load. */
4380 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (child
), j
, vstmt
)
4382 || vect_stmt_dominates_stmt_p (last_stmt
, vstmt
))
4385 else if (!SLP_TREE_VECTYPE (child
))
4387 /* For externals we use unvectorized at all scalar defs. */
4390 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (child
), j
, def
)
4391 if (TREE_CODE (def
) == SSA_NAME
4392 && !SSA_NAME_IS_DEFAULT_DEF (def
))
4394 gimple
*stmt
= SSA_NAME_DEF_STMT (def
);
4396 || vect_stmt_dominates_stmt_p (last_stmt
, stmt
))
4402 /* For externals we have to look at all defs since their
4403 insertion place is decided per vector. */
4406 FOR_EACH_VEC_ELT (SLP_TREE_VEC_DEFS (child
), j
, vdef
)
4407 if (TREE_CODE (vdef
) == SSA_NAME
4408 && !SSA_NAME_IS_DEFAULT_DEF (vdef
))
4410 gimple
*vstmt
= SSA_NAME_DEF_STMT (vdef
);
4412 || vect_stmt_dominates_stmt_p (last_stmt
, vstmt
))
4416 /* This can happen when all children are pre-existing vectors or
4419 last_stmt
= vect_find_first_scalar_stmt_in_slp (node
)->stmt
;
4420 if (is_a
<gphi
*> (last_stmt
))
4421 si
= gsi_after_labels (gimple_bb (last_stmt
));
4424 si
= gsi_for_stmt (last_stmt
);
4429 bool done_p
= false;
4431 /* Handle purely internal nodes. */
4432 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
4434 /* ??? the transform kind is stored to STMT_VINFO_TYPE which might
4435 be shared with different SLP nodes (but usually it's the same
4436 operation apart from the case the stmt is only there for denoting
4437 the actual scalar lane defs ...). So do not call vect_transform_stmt
4438 but open-code it here (partly). */
4439 bool done
= vectorizable_slp_permutation (vinfo
, &si
, node
, NULL
);
4444 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4447 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4448 For loop vectorization this is done in vectorizable_call, but for SLP
4449 it needs to be deferred until end of vect_schedule_slp, because multiple
4450 SLP instances may refer to the same scalar stmt. */
4453 vect_remove_slp_scalar_calls (vec_info
*vinfo
,
4454 slp_tree node
, hash_set
<slp_tree
> &visited
)
4457 gimple_stmt_iterator gsi
;
4461 stmt_vec_info stmt_info
;
4463 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4466 if (visited
.add (node
))
4469 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4470 vect_remove_slp_scalar_calls (vinfo
, child
, visited
);
4472 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4474 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4475 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4477 if (is_pattern_stmt_p (stmt_info
)
4478 || !PURE_SLP_STMT (stmt_info
))
4480 lhs
= gimple_call_lhs (stmt
);
4481 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4482 gsi
= gsi_for_stmt (stmt
);
4483 vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4484 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4489 vect_remove_slp_scalar_calls (vec_info
*vinfo
, slp_tree node
)
4491 hash_set
<slp_tree
> visited
;
4492 vect_remove_slp_scalar_calls (vinfo
, node
, visited
);
4495 /* Vectorize the instance root. */
4498 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
4500 gassign
*rstmt
= NULL
;
4502 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
4507 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt
)
4509 tree vect_lhs
= gimple_get_lhs (child_stmt
);
4510 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4511 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
4512 TREE_TYPE (vect_lhs
)))
4513 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
4515 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
4519 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
4521 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
4524 vec
<constructor_elt
, va_gc
> *v
;
4525 vec_alloc (v
, nelts
);
4527 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt
)
4529 CONSTRUCTOR_APPEND_ELT (v
,
4531 gimple_get_lhs (child_stmt
));
4533 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4534 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
4535 tree r_constructor
= build_constructor (rtype
, v
);
4536 rstmt
= gimple_build_assign (lhs
, r_constructor
);
4541 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
4542 gsi_replace (&rgsi
, rstmt
, true);
4545 /* Generate vector code for all SLP instances in the loop/basic block. */
4548 vect_schedule_slp (vec_info
*vinfo
)
4550 vec
<slp_instance
> slp_instances
;
4551 slp_instance instance
;
4554 slp_instances
= vinfo
->slp_instances
;
4555 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4557 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4558 /* Schedule the tree of INSTANCE. */
4559 vect_schedule_slp_instance (vinfo
, node
, instance
);
4561 if (SLP_INSTANCE_ROOT_STMT (instance
))
4562 vectorize_slp_instance_root_stmt (node
, instance
);
4564 if (dump_enabled_p ())
4565 dump_printf_loc (MSG_NOTE
, vect_location
,
4566 "vectorizing stmts using SLP.\n");
4569 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4571 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4572 stmt_vec_info store_info
;
4575 /* Remove scalar call stmts. Do not do this for basic-block
4576 vectorization as not all uses may be vectorized.
4577 ??? Why should this be necessary? DCE should be able to
4578 remove the stmts itself.
4579 ??? For BB vectorization we can as well remove scalar
4580 stmts starting from the SLP tree root if they have no
4582 if (is_a
<loop_vec_info
> (vinfo
))
4583 vect_remove_slp_scalar_calls (vinfo
, root
);
4585 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
); j
++)
4587 if (!STMT_VINFO_DATA_REF (store_info
))
4590 if (SLP_INSTANCE_ROOT_STMT (instance
))
4593 store_info
= vect_orig_stmt (store_info
);
4594 /* Free the attached stmt_vec_info and remove the stmt. */
4595 vinfo
->remove_stmt (store_info
);