1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 #include "dump-context.h"
50 /* Initialize a SLP node. */
52 _slp_tree::_slp_tree ()
54 SLP_TREE_SCALAR_STMTS (this) = vNULL
;
55 SLP_TREE_SCALAR_OPS (this) = vNULL
;
56 SLP_TREE_VEC_STMTS (this) = vNULL
;
57 SLP_TREE_VEC_DEFS (this) = vNULL
;
58 SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
59 SLP_TREE_CHILDREN (this) = vNULL
;
60 SLP_TREE_LOAD_PERMUTATION (this) = vNULL
;
61 SLP_TREE_TWO_OPERATORS (this) = false;
62 SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def
;
63 SLP_TREE_VECTYPE (this) = NULL_TREE
;
64 SLP_TREE_REPRESENTATIVE (this) = NULL
;
70 /* Tear down a SLP node. */
72 _slp_tree::~_slp_tree ()
74 SLP_TREE_CHILDREN (this).release ();
75 SLP_TREE_SCALAR_STMTS (this).release ();
76 SLP_TREE_SCALAR_OPS (this).release ();
77 SLP_TREE_VEC_STMTS (this).release ();
78 SLP_TREE_VEC_DEFS (this).release ();
79 SLP_TREE_LOAD_PERMUTATION (this).release ();
82 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
83 FINAL_P is true if we have vectorized the instance or if we have
84 made a final decision not to vectorize the statements in any way. */
87 vect_free_slp_tree (slp_tree node
, bool final_p
)
92 if (--node
->refcnt
!= 0)
95 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
96 vect_free_slp_tree (child
, final_p
);
98 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
99 Some statements might no longer exist, after having been
100 removed by vect_transform_stmt. Updating the remaining
101 statements would be redundant. */
104 stmt_vec_info stmt_info
;
105 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
107 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
108 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
115 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
116 have vectorized the instance or if we have made a final decision not
117 to vectorize the statements in any way. */
120 vect_free_slp_instance (slp_instance instance
, bool final_p
)
122 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
123 SLP_INSTANCE_LOADS (instance
).release ();
128 /* Create an SLP node for SCALAR_STMTS. */
131 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
, unsigned nops
)
133 slp_tree node
= new _slp_tree
;
134 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
135 SLP_TREE_CHILDREN (node
).create (nops
);
136 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
137 SLP_TREE_REPRESENTATIVE (node
) = scalar_stmts
[0];
138 SLP_TREE_LANES (node
) = scalar_stmts
.length ();
141 stmt_vec_info stmt_info
;
142 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
143 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
148 /* Create an SLP node for OPS. */
151 vect_create_new_slp_node (vec
<tree
> ops
)
153 slp_tree node
= new _slp_tree
;
154 SLP_TREE_SCALAR_OPS (node
) = ops
;
155 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
156 SLP_TREE_LANES (node
) = ops
.length ();
161 /* This structure is used in creation of an SLP tree. Each instance
162 corresponds to the same operand in a group of scalar stmts in an SLP
164 typedef struct _slp_oprnd_info
166 /* Def-stmts for the operands. */
167 vec
<stmt_vec_info
> def_stmts
;
170 /* Information about the first statement, its vector def-type, type, the
171 operand itself in case it's constant, and an indication if it's a pattern
174 enum vect_def_type first_dt
;
179 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
181 static vec
<slp_oprnd_info
>
182 vect_create_oprnd_info (int nops
, int group_size
)
185 slp_oprnd_info oprnd_info
;
186 vec
<slp_oprnd_info
> oprnds_info
;
188 oprnds_info
.create (nops
);
189 for (i
= 0; i
< nops
; i
++)
191 oprnd_info
= XNEW (struct _slp_oprnd_info
);
192 oprnd_info
->def_stmts
.create (group_size
);
193 oprnd_info
->ops
.create (group_size
);
194 oprnd_info
->first_dt
= vect_uninitialized_def
;
195 oprnd_info
->first_op_type
= NULL_TREE
;
196 oprnd_info
->any_pattern
= false;
197 oprnds_info
.quick_push (oprnd_info
);
204 /* Free operands info. */
207 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
210 slp_oprnd_info oprnd_info
;
212 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
214 oprnd_info
->def_stmts
.release ();
215 oprnd_info
->ops
.release ();
216 XDELETE (oprnd_info
);
219 oprnds_info
.release ();
223 /* Return true if STMTS contains a pattern statement. */
226 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
228 stmt_vec_info stmt_info
;
230 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
231 if (is_pattern_stmt_p (stmt_info
))
236 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
237 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
241 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
242 stmt_vec_info first_stmt_info
)
244 stmt_vec_info next_stmt_info
= first_stmt_info
;
247 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
252 if (next_stmt_info
== stmt_info
)
254 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
256 result
+= DR_GROUP_GAP (next_stmt_info
);
258 while (next_stmt_info
);
263 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
264 using the method implemented by duplicate_and_interleave. Return true
265 if so, returning the number of intermediate vectors in *NVECTORS_OUT
266 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
270 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
271 tree elt_type
, unsigned int *nvectors_out
,
272 tree
*vector_type_out
,
275 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
276 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
279 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
280 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
281 unsigned int nvectors
= 1;
284 scalar_int_mode int_mode
;
285 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
286 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
288 /* Get the natural vector type for this SLP group size. */
289 tree int_type
= build_nonstandard_integer_type
290 (GET_MODE_BITSIZE (int_mode
), 1);
292 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
294 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
295 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
296 GET_MODE_SIZE (base_vector_mode
)))
298 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
299 together into elements of type INT_TYPE and using the result
300 to build NVECTORS vectors. */
301 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
302 vec_perm_builder
sel1 (nelts
, 2, 3);
303 vec_perm_builder
sel2 (nelts
, 2, 3);
304 poly_int64 half_nelts
= exact_div (nelts
, 2);
305 for (unsigned int i
= 0; i
< 3; ++i
)
308 sel1
.quick_push (i
+ nelts
);
309 sel2
.quick_push (half_nelts
+ i
);
310 sel2
.quick_push (half_nelts
+ i
+ nelts
);
312 vec_perm_indices
indices1 (sel1
, 2, nelts
);
313 vec_perm_indices
indices2 (sel2
, 2, nelts
);
314 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
315 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
318 *nvectors_out
= nvectors
;
320 *vector_type_out
= vector_type
;
323 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
325 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
332 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
338 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
339 they are of a valid type and that they match the defs of the first stmt of
340 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
341 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
342 indicates swap is required for cond_expr stmts. Specifically, *SWAP
343 is 1 if STMT is cond and operands of comparison need to be swapped;
344 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
345 If there is any operand swap in this function, *SWAP is set to non-zero
347 If there was a fatal error return -1; if the error could be corrected by
348 swapping operands of father node of this one, return 1; if everything is
351 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
352 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
353 vec
<slp_oprnd_info
> *oprnds_info
)
355 stmt_vec_info stmt_info
= stmts
[stmt_num
];
357 unsigned int i
, number_of_oprnds
;
358 enum vect_def_type dt
= vect_uninitialized_def
;
359 slp_oprnd_info oprnd_info
;
360 int first_op_idx
= 1;
361 unsigned int commutative_op
= -1U;
362 bool first_op_cond
= false;
363 bool first
= stmt_num
== 0;
365 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
367 number_of_oprnds
= gimple_call_num_args (stmt
);
369 if (gimple_call_internal_p (stmt
))
371 internal_fn ifn
= gimple_call_internal_fn (stmt
);
372 commutative_op
= first_commutative_argument (ifn
);
374 /* Masked load, only look at mask. */
375 if (ifn
== IFN_MASK_LOAD
)
377 number_of_oprnds
= 1;
378 /* Mask operand index. */
383 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
385 enum tree_code code
= gimple_assign_rhs_code (stmt
);
386 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
387 /* Swap can only be done for cond_expr if asked to, otherwise we
388 could result in different comparison code to the first stmt. */
389 if (code
== COND_EXPR
390 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
392 first_op_cond
= true;
396 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
401 bool swapped
= (*swap
!= 0);
402 gcc_assert (!swapped
|| first_op_cond
);
403 for (i
= 0; i
< number_of_oprnds
; i
++)
408 /* Map indicating how operands of cond_expr should be swapped. */
409 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
410 int *map
= maps
[*swap
];
413 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
414 first_op_idx
), map
[i
]);
416 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
419 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
420 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
421 oprnd
= TREE_OPERAND (oprnd
, 0);
423 oprnd_info
= (*oprnds_info
)[i
];
425 stmt_vec_info def_stmt_info
;
426 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
428 if (dump_enabled_p ())
429 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
430 "Build SLP failed: can't analyze def for %T\n",
436 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
437 oprnd_info
->any_pattern
= true;
441 /* For the swapping logic below force vect_reduction_def
442 for the reduction op in a SLP reduction group. */
443 if (!STMT_VINFO_DATA_REF (stmt_info
)
444 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
445 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
447 dt
= vect_reduction_def
;
448 oprnd_info
->first_dt
= dt
;
449 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
453 /* Not first stmt of the group, check that the def-stmt/s match
454 the def-stmt/s of the first stmt. Allow different definition
455 types for reduction chains: the first stmt must be a
456 vect_reduction_def (a phi node), and the rest
457 end in the reduction chain. */
458 tree type
= TREE_TYPE (oprnd
);
459 if ((oprnd_info
->first_dt
!= dt
460 && !(oprnd_info
->first_dt
== vect_reduction_def
461 && !STMT_VINFO_DATA_REF (stmt_info
)
462 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
464 && !STMT_VINFO_DATA_REF (def_stmt_info
)
465 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
466 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
467 && !((oprnd_info
->first_dt
== vect_external_def
468 || oprnd_info
->first_dt
== vect_constant_def
)
469 && (dt
== vect_external_def
470 || dt
== vect_constant_def
)))
471 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
472 || (!STMT_VINFO_DATA_REF (stmt_info
)
473 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
475 || STMT_VINFO_DATA_REF (def_stmt_info
)
476 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
477 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
478 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
480 /* Try swapping operands if we got a mismatch. */
481 if (i
== commutative_op
&& !swapped
)
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_NOTE
, vect_location
,
485 "trying swapped operands\n");
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
492 "Build SLP failed: different types\n");
496 if ((dt
== vect_constant_def
497 || dt
== vect_external_def
)
498 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
499 && (TREE_CODE (type
) == BOOLEAN_TYPE
500 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
505 "Build SLP failed: invalid type of def "
506 "for variable-length SLP %T\n", oprnd
);
511 /* Check the types of the definitions. */
514 case vect_external_def
:
515 /* Make sure to demote the overall operand to external. */
516 oprnd_info
->first_dt
= vect_external_def
;
518 case vect_constant_def
:
519 oprnd_info
->def_stmts
.quick_push (NULL
);
520 oprnd_info
->ops
.quick_push (oprnd
);
523 case vect_internal_def
:
524 case vect_reduction_def
:
525 if (oprnd_info
->first_dt
== vect_reduction_def
526 && !STMT_VINFO_DATA_REF (stmt_info
)
527 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
528 && !STMT_VINFO_DATA_REF (def_stmt_info
)
529 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
530 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
532 /* For a SLP reduction chain we want to duplicate the
533 reduction to each of the chain members. That gets
534 us a sane SLP graph (still the stmts are not 100%
535 correct wrt the initial values). */
537 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
538 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
542 case vect_induction_def
:
543 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
544 oprnd_info
->ops
.quick_push (oprnd
);
548 /* FORNOW: Not supported. */
549 if (dump_enabled_p ())
550 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
551 "Build SLP failed: illegal type of def %T\n",
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE
, vect_location
,
563 "swapped operands to match def types in %G",
571 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
572 Return true if we can, meaning that this choice doesn't conflict with
573 existing SLP nodes that use STMT_INFO. */
576 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
578 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
579 if (old_vectype
&& useless_type_conversion_p (vectype
, old_vectype
))
582 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
583 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
585 /* We maintain the invariant that if any statement in the group is
586 used, all other members of the group have the same vector type. */
587 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
588 stmt_vec_info member_info
= first_info
;
589 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
590 if (STMT_VINFO_NUM_SLP_USES (member_info
) > 0
591 || is_pattern_stmt_p (member_info
))
596 for (member_info
= first_info
; member_info
;
597 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
598 STMT_VINFO_VECTYPE (member_info
) = vectype
;
602 else if (STMT_VINFO_NUM_SLP_USES (stmt_info
) == 0
603 && !is_pattern_stmt_p (stmt_info
))
605 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
609 if (dump_enabled_p ())
611 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
612 "Build SLP failed: incompatible vector"
613 " types for: %G", stmt_info
->stmt
);
614 dump_printf_loc (MSG_NOTE
, vect_location
,
615 " old vector type: %T\n", old_vectype
);
616 dump_printf_loc (MSG_NOTE
, vect_location
,
617 " new vector type: %T\n", vectype
);
622 /* Return true if call statements CALL1 and CALL2 are similar enough
623 to be combined into the same SLP group. */
626 compatible_calls_p (gcall
*call1
, gcall
*call2
)
628 unsigned int nargs
= gimple_call_num_args (call1
);
629 if (nargs
!= gimple_call_num_args (call2
))
632 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
635 if (gimple_call_internal_p (call1
))
637 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
638 TREE_TYPE (gimple_call_lhs (call2
))))
640 for (unsigned int i
= 0; i
< nargs
; ++i
)
641 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
642 TREE_TYPE (gimple_call_arg (call2
, i
))))
647 if (!operand_equal_p (gimple_call_fn (call1
),
648 gimple_call_fn (call2
), 0))
651 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
657 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
658 caller's attempt to find the vector type in STMT_INFO with the narrowest
659 element type. Return true if VECTYPE is nonnull and if it is valid
660 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
661 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
662 vect_build_slp_tree. */
665 vect_record_max_nunits (vec_info
*vinfo
, stmt_vec_info stmt_info
,
666 unsigned int group_size
,
667 tree vectype
, poly_uint64
*max_nunits
)
671 if (dump_enabled_p ())
672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
673 "Build SLP failed: unsupported data-type in %G\n",
675 /* Fatal mismatch. */
679 /* If populating the vector type requires unrolling then fail
680 before adjusting *max_nunits for basic-block vectorization. */
681 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
682 unsigned HOST_WIDE_INT const_nunits
;
683 if (is_a
<bb_vec_info
> (vinfo
)
684 && (!nunits
.is_constant (&const_nunits
)
685 || const_nunits
> group_size
))
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
689 "Build SLP failed: unrolling required "
690 "in basic block SLP\n");
691 /* Fatal mismatch. */
695 /* In case of multiple types we need to detect the smallest type. */
696 vect_update_max_nunits (max_nunits
, vectype
);
700 /* STMTS is a group of GROUP_SIZE SLP statements in which some
701 statements do the same operation as the first statement and in which
702 the others do ALT_STMT_CODE. Return true if we can take one vector
703 of the first operation and one vector of the second and permute them
704 to get the required result. VECTYPE is the type of the vector that
705 would be permuted. */
708 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
709 unsigned int group_size
, tree vectype
,
710 tree_code alt_stmt_code
)
712 unsigned HOST_WIDE_INT count
;
713 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
716 vec_perm_builder
sel (count
, count
, 1);
717 for (unsigned int i
= 0; i
< count
; ++i
)
719 unsigned int elt
= i
;
720 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
721 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
723 sel
.quick_push (elt
);
725 vec_perm_indices
indices (sel
, 2, count
);
726 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
729 /* Verify if the scalar stmts STMTS are isomorphic, require data
730 permutation or are of unsupported types of operation. Return
731 true if they are, otherwise return false and indicate in *MATCHES
732 which stmts are not isomorphic to the first one. If MATCHES[0]
733 is false then this indicates the comparison could not be
734 carried out or the stmts will never be vectorized by SLP.
736 Note COND_EXPR is possibly isomorphic to another one after swapping its
737 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
738 the first stmt by swapping the two operands of comparison; set SWAP[i]
739 to 2 if stmt I is isormorphic to the first stmt by inverting the code
740 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
741 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
744 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
745 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
746 poly_uint64
*max_nunits
, bool *matches
,
750 stmt_vec_info first_stmt_info
= stmts
[0];
751 enum tree_code first_stmt_code
= ERROR_MARK
;
752 enum tree_code alt_stmt_code
= ERROR_MARK
;
753 enum tree_code rhs_code
= ERROR_MARK
;
754 enum tree_code first_cond_code
= ERROR_MARK
;
756 bool need_same_oprnds
= false;
757 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
760 machine_mode optab_op2_mode
;
761 machine_mode vec_mode
;
762 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
765 /* For every stmt in NODE find its def stmt/s. */
766 stmt_vec_info stmt_info
;
767 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
769 gimple
*stmt
= stmt_info
->stmt
;
773 if (dump_enabled_p ())
774 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
776 /* Fail to vectorize statements marked as unvectorizable. */
777 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
779 if (dump_enabled_p ())
780 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
781 "Build SLP failed: unvectorizable statement %G",
783 /* Fatal mismatch. */
788 lhs
= gimple_get_lhs (stmt
);
789 if (lhs
== NULL_TREE
)
791 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
793 "Build SLP failed: not GIMPLE_ASSIGN nor "
794 "GIMPLE_CALL %G", stmt
);
795 /* Fatal mismatch. */
801 if (!vect_get_vector_types_for_stmt (vinfo
, stmt_info
, &vectype
,
802 &nunits_vectype
, group_size
)
804 && !vect_record_max_nunits (vinfo
, stmt_info
, group_size
,
805 nunits_vectype
, max_nunits
)))
807 /* Fatal mismatch. */
812 gcc_assert (vectype
);
814 if (is_a
<bb_vec_info
> (vinfo
)
815 && !vect_update_shared_vectype (stmt_info
, vectype
))
818 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
821 rhs_code
= CALL_EXPR
;
823 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
825 else if ((gimple_call_internal_p (call_stmt
)
826 && (!vectorizable_internal_fn_p
827 (gimple_call_internal_fn (call_stmt
))))
828 || gimple_call_tail_p (call_stmt
)
829 || gimple_call_noreturn_p (call_stmt
)
830 || !gimple_call_nothrow_p (call_stmt
)
831 || gimple_call_chain (call_stmt
))
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
835 "Build SLP failed: unsupported call type %G",
837 /* Fatal mismatch. */
844 rhs_code
= gimple_assign_rhs_code (stmt
);
845 load_p
= TREE_CODE_CLASS (rhs_code
) == tcc_reference
;
848 /* Check the operation. */
851 first_stmt_code
= rhs_code
;
853 /* Shift arguments should be equal in all the packed stmts for a
854 vector shift with scalar shift operand. */
855 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
856 || rhs_code
== LROTATE_EXPR
857 || rhs_code
== RROTATE_EXPR
)
859 vec_mode
= TYPE_MODE (vectype
);
861 /* First see if we have a vector/vector shift. */
862 optab
= optab_for_tree_code (rhs_code
, vectype
,
866 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
868 /* No vector/vector shift, try for a vector/scalar shift. */
869 optab
= optab_for_tree_code (rhs_code
, vectype
,
874 if (dump_enabled_p ())
875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
876 "Build SLP failed: no optab.\n");
877 /* Fatal mismatch. */
881 icode
= (int) optab_handler (optab
, vec_mode
);
882 if (icode
== CODE_FOR_nothing
)
884 if (dump_enabled_p ())
885 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
887 "op not supported by target.\n");
888 /* Fatal mismatch. */
892 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
893 if (!VECTOR_MODE_P (optab_op2_mode
))
895 need_same_oprnds
= true;
896 first_op1
= gimple_assign_rhs2 (stmt
);
900 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
902 need_same_oprnds
= true;
903 first_op1
= gimple_assign_rhs2 (stmt
);
906 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
908 need_same_oprnds
= true;
909 first_op1
= gimple_call_arg (call_stmt
, 1);
914 if (first_stmt_code
!= rhs_code
915 && alt_stmt_code
== ERROR_MARK
)
916 alt_stmt_code
= rhs_code
;
917 if (first_stmt_code
!= rhs_code
918 && (first_stmt_code
!= IMAGPART_EXPR
919 || rhs_code
!= REALPART_EXPR
)
920 && (first_stmt_code
!= REALPART_EXPR
921 || rhs_code
!= IMAGPART_EXPR
)
922 /* Handle mismatches in plus/minus by computing both
923 and merging the results. */
924 && !((first_stmt_code
== PLUS_EXPR
925 || first_stmt_code
== MINUS_EXPR
)
926 && (alt_stmt_code
== PLUS_EXPR
927 || alt_stmt_code
== MINUS_EXPR
)
928 && rhs_code
== alt_stmt_code
)
929 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
930 && (first_stmt_code
== ARRAY_REF
931 || first_stmt_code
== BIT_FIELD_REF
932 || first_stmt_code
== INDIRECT_REF
933 || first_stmt_code
== COMPONENT_REF
934 || first_stmt_code
== MEM_REF
)))
936 if (dump_enabled_p ())
938 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
939 "Build SLP failed: different operation "
941 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
942 "original stmt %G", first_stmt_info
->stmt
);
948 if (need_same_oprnds
)
950 tree other_op1
= (call_stmt
951 ? gimple_call_arg (call_stmt
, 1)
952 : gimple_assign_rhs2 (stmt
));
953 if (!operand_equal_p (first_op1
, other_op1
, 0))
955 if (dump_enabled_p ())
956 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
957 "Build SLP failed: different shift "
958 "arguments in %G", stmt
);
964 if (!load_p
&& rhs_code
== CALL_EXPR
)
966 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
967 as_a
<gcall
*> (stmt
)))
969 if (dump_enabled_p ())
970 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
971 "Build SLP failed: different calls in %G",
979 /* Grouped store or load. */
980 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
982 if (REFERENCE_CLASS_P (lhs
))
990 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
993 /* Check that there are no loads from different interleaving
994 chains in the same node. */
995 if (prev_first_load
!= first_load
)
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1000 "Build SLP failed: different "
1001 "interleaving chains in one node %G",
1008 prev_first_load
= first_load
;
1010 } /* Grouped access. */
1015 /* Not grouped load. */
1016 if (dump_enabled_p ())
1017 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1018 "Build SLP failed: not grouped load %G", stmt
);
1020 /* FORNOW: Not grouped loads are not supported. */
1021 /* Fatal mismatch. */
1026 /* Not memory operation. */
1027 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
1028 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1029 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1030 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1031 && rhs_code
!= VIEW_CONVERT_EXPR
1032 && rhs_code
!= CALL_EXPR
)
1034 if (dump_enabled_p ())
1035 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1036 "Build SLP failed: operation unsupported %G",
1038 /* Fatal mismatch. */
1043 if (rhs_code
== COND_EXPR
)
1045 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1046 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1047 enum tree_code swap_code
= ERROR_MARK
;
1048 enum tree_code invert_code
= ERROR_MARK
;
1051 first_cond_code
= TREE_CODE (cond_expr
);
1052 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1054 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1055 swap_code
= swap_tree_comparison (cond_code
);
1056 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1059 if (first_cond_code
== cond_code
)
1061 /* Isomorphic can be achieved by swapping. */
1062 else if (first_cond_code
== swap_code
)
1064 /* Isomorphic can be achieved by inverting. */
1065 else if (first_cond_code
== invert_code
)
1069 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1071 "Build SLP failed: different"
1072 " operation %G", stmt
);
1082 for (i
= 0; i
< group_size
; ++i
)
1086 /* If we allowed a two-operation SLP node verify the target can cope
1087 with the permute we are going to use. */
1088 if (alt_stmt_code
!= ERROR_MARK
1089 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1091 if (!vect_two_operations_perm_ok_p (stmts
, group_size
,
1092 vectype
, alt_stmt_code
))
1094 for (i
= 0; i
< group_size
; ++i
)
1095 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1098 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1101 "Build SLP failed: different operation "
1102 "in stmt %G", stmts
[i
]->stmt
);
1103 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1104 "original stmt %G", first_stmt_info
->stmt
);
1109 *two_operators
= true;
1115 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1116 Note we never remove apart from at destruction time so we do not
1117 need a special value for deleted that differs from empty. */
1120 typedef vec
<stmt_vec_info
> value_type
;
1121 typedef vec
<stmt_vec_info
> compare_type
;
1122 static inline hashval_t
hash (value_type
);
1123 static inline bool equal (value_type existing
, value_type candidate
);
1124 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1125 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1126 static const bool empty_zero_p
= true;
1127 static inline void mark_empty (value_type
&x
) { x
.release (); }
1128 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1129 static inline void remove (value_type
&x
) { x
.release (); }
1132 bst_traits::hash (value_type x
)
1135 for (unsigned i
= 0; i
< x
.length (); ++i
)
1136 h
.add_int (gimple_uid (x
[i
]->stmt
));
1140 bst_traits::equal (value_type existing
, value_type candidate
)
1142 if (existing
.length () != candidate
.length ())
1144 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1145 if (existing
[i
] != candidate
[i
])
1150 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1151 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1152 scalar_stmts_to_slp_tree_map_t
;
1155 vect_build_slp_tree_2 (vec_info
*vinfo
,
1156 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1157 poly_uint64
*max_nunits
,
1158 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1159 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1162 vect_build_slp_tree (vec_info
*vinfo
,
1163 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1164 poly_uint64
*max_nunits
,
1165 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1166 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1168 if (slp_tree
*leader
= bst_map
->get (stmts
))
1170 if (dump_enabled_p ())
1171 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1172 *leader
? "" : "failed ", *leader
);
1175 (*leader
)->refcnt
++;
1176 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1180 poly_uint64 this_max_nunits
= 1;
1181 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
,
1183 matches
, npermutes
, tree_size
, bst_map
);
1186 res
->max_nunits
= this_max_nunits
;
1187 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1188 /* Keep a reference for the bst_map use. */
1191 bst_map
->put (stmts
.copy (), res
);
1195 /* Recursively build an SLP tree starting from NODE.
1196 Fail (and return a value not equal to zero) if def-stmts are not
1197 isomorphic, require data permutation or are of unsupported types of
1198 operation. Otherwise, return 0.
1199 The value returned is the depth in the SLP tree where a mismatch
1203 vect_build_slp_tree_2 (vec_info
*vinfo
,
1204 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1205 poly_uint64
*max_nunits
,
1206 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1207 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1209 unsigned nops
, i
, this_tree_size
= 0;
1210 poly_uint64 this_max_nunits
= *max_nunits
;
1215 stmt_vec_info stmt_info
= stmts
[0];
1216 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1217 nops
= gimple_call_num_args (stmt
);
1218 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1220 nops
= gimple_num_ops (stmt
) - 1;
1221 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1224 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1229 /* If the SLP node is a PHI (induction or reduction), terminate
1231 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1233 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1234 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1235 if (!vect_record_max_nunits (vinfo
, stmt_info
, group_size
, vectype
,
1239 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1240 /* Induction from different IVs is not supported. */
1241 if (def_type
== vect_induction_def
)
1243 stmt_vec_info other_info
;
1244 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1245 if (stmt_info
!= other_info
)
1248 else if (def_type
== vect_reduction_def
1249 || def_type
== vect_double_reduction_def
1250 || def_type
== vect_nested_cycle
)
1252 /* Else def types have to match. */
1253 stmt_vec_info other_info
;
1254 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1255 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1261 node
= vect_create_new_slp_node (stmts
, 0);
1266 bool two_operators
= false;
1267 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1268 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1269 &this_max_nunits
, matches
, &two_operators
))
1272 /* If the SLP node is a load, terminate the recursion unless masked. */
1273 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1274 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1276 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1279 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1284 *max_nunits
= this_max_nunits
;
1286 node
= vect_create_new_slp_node (stmts
, 0);
1287 /* And compute the load permutation. Whether it is actually
1288 a permutation depends on the unrolling factor which is
1290 vec
<unsigned> load_permutation
;
1292 stmt_vec_info load_info
;
1293 load_permutation
.create (group_size
);
1294 stmt_vec_info first_stmt_info
1295 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1296 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1298 int load_place
= vect_get_place_in_interleaving_chain
1299 (load_info
, first_stmt_info
);
1300 gcc_assert (load_place
!= -1);
1301 load_permutation
.safe_push (load_place
);
1303 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1308 /* Get at the operands, verifying they are compatible. */
1309 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1310 slp_oprnd_info oprnd_info
;
1311 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1313 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1314 stmts
, i
, &oprnds_info
);
1316 matches
[(res
== -1) ? 0 : i
] = false;
1320 for (i
= 0; i
< group_size
; ++i
)
1323 vect_free_oprnd_info (oprnds_info
);
1327 auto_vec
<slp_tree
, 4> children
;
1329 stmt_info
= stmts
[0];
1331 /* Create SLP_TREE nodes for the definition node/s. */
1332 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1337 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1339 /* COND_EXPR have one too many eventually if the condition
1341 gcc_assert (i
== 3 && nops
== 4);
1345 if (oprnd_info
->first_dt
!= vect_internal_def
1346 && oprnd_info
->first_dt
!= vect_reduction_def
1347 && oprnd_info
->first_dt
!= vect_induction_def
)
1349 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1350 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1351 oprnd_info
->ops
= vNULL
;
1352 children
.safe_push (invnode
);
1356 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1357 group_size
, &this_max_nunits
,
1359 &this_tree_size
, bst_map
)) != NULL
)
1361 oprnd_info
->def_stmts
= vNULL
;
1362 children
.safe_push (child
);
1366 /* If the SLP build failed fatally and we analyze a basic-block
1367 simply treat nodes we fail to build as externally defined
1368 (and thus build vectors from the scalar defs).
1369 The cost model will reject outright expensive cases.
1370 ??? This doesn't treat cases where permutation ultimatively
1371 fails (or we don't try permutation below). Ideally we'd
1372 even compute a permutation that will end up with the maximum
1374 if (is_a
<bb_vec_info
> (vinfo
)
1376 /* ??? Rejecting patterns this way doesn't work. We'd have to
1377 do extra work to cancel the pattern so the uses see the
1379 && !is_pattern_stmt_p (stmt_info
)
1380 && !oprnd_info
->any_pattern
)
1382 if (dump_enabled_p ())
1383 dump_printf_loc (MSG_NOTE
, vect_location
,
1384 "Building vector operands from scalars\n");
1386 child
= vect_create_new_slp_node (oprnd_info
->ops
);
1387 children
.safe_push (child
);
1388 oprnd_info
->ops
= vNULL
;
1389 oprnd_info
->def_stmts
= vNULL
;
1393 /* If the SLP build for operand zero failed and operand zero
1394 and one can be commutated try that for the scalar stmts
1395 that failed the match. */
1397 /* A first scalar stmt mismatch signals a fatal mismatch. */
1399 /* ??? For COND_EXPRs we can swap the comparison operands
1400 as well as the arms under some constraints. */
1402 && oprnds_info
[1]->first_dt
== vect_internal_def
1403 && is_gimple_assign (stmt_info
->stmt
)
1404 /* Swapping operands for reductions breaks assumptions later on. */
1405 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1406 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1407 /* Do so only if the number of not successful permutes was nor more
1408 than a cut-ff as re-trying the recursive match on
1409 possibly each level of the tree would expose exponential
1413 /* See whether we can swap the matching or the non-matching
1415 bool swap_not_matching
= true;
1418 for (j
= 0; j
< group_size
; ++j
)
1420 if (matches
[j
] != !swap_not_matching
)
1422 stmt_vec_info stmt_info
= stmts
[j
];
1423 /* Verify if we can swap operands of this stmt. */
1424 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1426 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1428 if (!swap_not_matching
)
1430 swap_not_matching
= false;
1435 while (j
!= group_size
);
1437 /* Swap mismatched definition stmts. */
1438 if (dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE
, vect_location
,
1440 "Re-trying with swapped operands of stmts ");
1441 for (j
= 0; j
< group_size
; ++j
)
1442 if (matches
[j
] == !swap_not_matching
)
1444 std::swap (oprnds_info
[0]->def_stmts
[j
],
1445 oprnds_info
[1]->def_stmts
[j
]);
1446 std::swap (oprnds_info
[0]->ops
[j
],
1447 oprnds_info
[1]->ops
[j
]);
1448 if (dump_enabled_p ())
1449 dump_printf (MSG_NOTE
, "%d ", j
);
1451 if (dump_enabled_p ())
1452 dump_printf (MSG_NOTE
, "\n");
1453 /* And try again with scratch 'matches' ... */
1454 bool *tem
= XALLOCAVEC (bool, group_size
);
1455 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1456 group_size
, &this_max_nunits
,
1458 &this_tree_size
, bst_map
)) != NULL
)
1460 oprnd_info
->def_stmts
= vNULL
;
1461 children
.safe_push (child
);
1469 gcc_assert (child
== NULL
);
1470 FOR_EACH_VEC_ELT (children
, j
, child
)
1471 vect_free_slp_tree (child
, false);
1472 vect_free_oprnd_info (oprnds_info
);
1476 vect_free_oprnd_info (oprnds_info
);
1478 /* If we have all children of a non-unary child built up from
1479 scalars then just throw that away, causing it built up
1482 && is_a
<bb_vec_info
> (vinfo
)
1483 /* ??? Rejecting patterns this way doesn't work. We'd have to
1484 do extra work to cancel the pattern so the uses see the
1486 && !is_pattern_stmt_p (stmt_info
))
1490 FOR_EACH_VEC_ELT (children
, j
, child
)
1491 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
1496 FOR_EACH_VEC_ELT (children
, j
, child
)
1497 vect_free_slp_tree (child
, false);
1499 if (dump_enabled_p ())
1500 dump_printf_loc (MSG_NOTE
, vect_location
,
1501 "Building parent vector operands from "
1502 "scalars instead\n");
1507 *tree_size
+= this_tree_size
+ 1;
1508 *max_nunits
= this_max_nunits
;
1510 node
= vect_create_new_slp_node (stmts
, nops
);
1511 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1512 SLP_TREE_CHILDREN (node
).splice (children
);
1516 /* Dump a single SLP tree NODE. */
1519 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1524 stmt_vec_info stmt_info
;
1527 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1528 dump_user_location_t user_loc
= loc
.get_user_location ();
1529 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1530 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1532 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1535 estimated_poly_value (node
->max_nunits
), node
->refcnt
);
1536 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1537 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1538 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1541 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1542 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1543 dump_printf (metadata
, "%T%s ", op
,
1544 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1545 dump_printf (metadata
, "}\n");
1547 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1549 dump_printf_loc (metadata
, user_loc
, "\tload permutation {");
1550 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), i
, j
)
1551 dump_printf (dump_kind
, " %u", j
);
1552 dump_printf (dump_kind
, " }\n");
1554 if (SLP_TREE_CHILDREN (node
).is_empty ())
1556 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1557 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1558 dump_printf (dump_kind
, " %p", (void *)child
);
1559 dump_printf (dump_kind
, "\n");
1563 debug (slp_tree node
)
1565 debug_dump_context ctx
;
1566 vect_print_slp_tree (MSG_NOTE
,
1567 dump_location_t::from_location_t (UNKNOWN_LOCATION
),
1571 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1574 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1575 slp_tree node
, hash_set
<slp_tree
> &visited
)
1580 if (visited
.add (node
))
1583 vect_print_slp_tree (dump_kind
, loc
, node
);
1585 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1586 vect_print_slp_graph (dump_kind
, loc
, child
, visited
);
1590 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1593 hash_set
<slp_tree
> visited
;
1594 vect_print_slp_graph (dump_kind
, loc
, entry
, visited
);
1597 /* Mark the tree rooted at NODE with PURE_SLP. */
1600 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1603 stmt_vec_info stmt_info
;
1606 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1609 if (visited
.add (node
))
1612 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1613 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1615 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1616 vect_mark_slp_stmts (child
, visited
);
1620 vect_mark_slp_stmts (slp_tree node
)
1622 hash_set
<slp_tree
> visited
;
1623 vect_mark_slp_stmts (node
, visited
);
1626 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1629 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1632 stmt_vec_info stmt_info
;
1635 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1638 if (visited
.add (node
))
1641 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1643 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1644 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1645 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1648 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1649 vect_mark_slp_stmts_relevant (child
, visited
);
1653 vect_mark_slp_stmts_relevant (slp_tree node
)
1655 hash_set
<slp_tree
> visited
;
1656 vect_mark_slp_stmts_relevant (node
, visited
);
1659 /* Copy the SLP subtree rooted at NODE. */
1662 slp_copy_subtree (slp_tree node
, hash_map
<slp_tree
, slp_tree
> &map
)
1667 slp_tree
©_ref
= map
.get_or_insert (node
, &existed_p
);
1671 copy_ref
= new _slp_tree
;
1672 slp_tree copy
= copy_ref
;
1673 SLP_TREE_DEF_TYPE (copy
) = SLP_TREE_DEF_TYPE (node
);
1674 SLP_TREE_VECTYPE (copy
) = SLP_TREE_VECTYPE (node
);
1675 SLP_TREE_REPRESENTATIVE (copy
) = SLP_TREE_REPRESENTATIVE (node
);
1676 SLP_TREE_LANES (copy
) = SLP_TREE_LANES (node
);
1677 copy
->max_nunits
= node
->max_nunits
;
1679 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1681 SLP_TREE_SCALAR_STMTS (copy
) = SLP_TREE_SCALAR_STMTS (node
).copy ();
1682 stmt_vec_info stmt_info
;
1683 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1684 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
1686 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1687 SLP_TREE_SCALAR_OPS (copy
) = SLP_TREE_SCALAR_OPS (node
).copy ();
1688 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1689 SLP_TREE_LOAD_PERMUTATION (copy
) = SLP_TREE_LOAD_PERMUTATION (node
).copy ();
1690 if (SLP_TREE_CHILDREN (node
).exists ())
1691 SLP_TREE_CHILDREN (copy
) = SLP_TREE_CHILDREN (node
).copy ();
1692 gcc_assert (!SLP_TREE_VEC_STMTS (node
).exists ());
1695 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy
), i
, child
)
1697 SLP_TREE_CHILDREN (copy
)[i
] = slp_copy_subtree (child
, map
);
1698 SLP_TREE_CHILDREN (copy
)[i
]->refcnt
++;
1703 /* Rearrange the statements of NODE according to PERMUTATION. */
1706 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1707 vec
<unsigned> permutation
,
1708 hash_set
<slp_tree
> &visited
)
1713 if (visited
.add (node
))
1716 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1717 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1719 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1721 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1722 vec
<stmt_vec_info
> tmp_stmts
;
1723 tmp_stmts
.create (group_size
);
1724 tmp_stmts
.quick_grow (group_size
);
1725 stmt_vec_info stmt_info
;
1726 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1727 tmp_stmts
[permutation
[i
]] = stmt_info
;
1728 SLP_TREE_SCALAR_STMTS (node
).release ();
1729 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1731 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1733 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1735 tmp_ops
.create (group_size
);
1736 tmp_ops
.quick_grow (group_size
);
1738 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1739 tmp_ops
[permutation
[i
]] = op
;
1740 SLP_TREE_SCALAR_OPS (node
).release ();
1741 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1746 /* Attempt to reorder stmts in a reduction chain so that we don't
1747 require any load permutation. Return true if that was possible,
1748 otherwise return false. */
1751 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1755 slp_tree node
, load
;
1757 if (SLP_INSTANCE_LOADS (slp_instn
).is_empty ())
1760 /* Compare all the permutation sequences to the first one. We know
1761 that at least one load is permuted. */
1762 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1763 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1765 unsigned int group_size
= SLP_TREE_LOAD_PERMUTATION (node
).length ();
1766 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1768 if (!SLP_TREE_LOAD_PERMUTATION (load
).exists ()
1769 || SLP_TREE_LOAD_PERMUTATION (load
).length () != group_size
)
1771 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load
), j
, lidx
)
1772 if (lidx
!= SLP_TREE_LOAD_PERMUTATION (node
)[j
])
1776 /* Check that the loads in the first sequence are different and there
1777 are no gaps between them. */
1778 auto_sbitmap
load_index (group_size
);
1779 bitmap_clear (load_index
);
1780 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1782 if (lidx
>= group_size
)
1784 if (bitmap_bit_p (load_index
, lidx
))
1787 bitmap_set_bit (load_index
, lidx
);
1789 for (i
= 0; i
< group_size
; i
++)
1790 if (!bitmap_bit_p (load_index
, i
))
1793 /* This permutation is valid for reduction. Since the order of the
1794 statements in the nodes is not important unless they are memory
1795 accesses, we can rearrange the statements in all the nodes
1796 according to the order of the loads. */
1798 /* We have to unshare the SLP tree we modify. */
1799 hash_map
<slp_tree
, slp_tree
> map
;
1800 slp_tree unshared
= slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn
), map
);
1801 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn
), false);
1803 SLP_INSTANCE_TREE (slp_instn
) = unshared
;
1804 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1805 SLP_INSTANCE_LOADS (slp_instn
)[i
] = *map
.get (node
);
1806 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1808 /* Do the actual re-arrangement. */
1809 hash_set
<slp_tree
> visited
;
1810 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1811 node
->load_permutation
, visited
);
1813 /* We are done, no actual permutations need to be generated. */
1814 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1815 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1817 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1818 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1819 /* But we have to keep those permutations that are required because
1820 of handling of gaps. */
1821 if (known_eq (unrolling_factor
, 1U)
1822 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1823 && DR_GROUP_GAP (first_stmt_info
) == 0))
1824 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1826 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1827 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1833 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1836 vect_gather_slp_loads (vec
<slp_tree
> &loads
, slp_tree node
,
1837 hash_set
<slp_tree
> &visited
)
1839 if (visited
.add (node
))
1842 if (SLP_TREE_CHILDREN (node
).length () == 0)
1844 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1846 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1847 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1848 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1849 loads
.safe_push (node
);
1855 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1856 vect_gather_slp_loads (loads
, child
, visited
);
1861 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1863 hash_set
<slp_tree
> visited
;
1864 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst
), node
, visited
);
1868 /* Find the last store in SLP INSTANCE. */
1871 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1873 stmt_vec_info last
= NULL
;
1874 stmt_vec_info stmt_vinfo
;
1876 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1878 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1879 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1885 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1886 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1887 (also containing the first GROUP1_SIZE stmts, since stores are
1888 consecutive), the second containing the remainder.
1889 Return the first stmt in the second group. */
1891 static stmt_vec_info
1892 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1894 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1895 gcc_assert (group1_size
> 0);
1896 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1897 gcc_assert (group2_size
> 0);
1898 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1900 stmt_vec_info stmt_info
= first_vinfo
;
1901 for (unsigned i
= group1_size
; i
> 1; i
--)
1903 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1904 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1906 /* STMT is now the last element of the first group. */
1907 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1908 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1910 DR_GROUP_SIZE (group2
) = group2_size
;
1911 for (stmt_info
= group2
; stmt_info
;
1912 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1914 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1915 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1918 /* For the second group, the DR_GROUP_GAP is that before the original group,
1919 plus skipping over the first vector. */
1920 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1922 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1923 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1925 if (dump_enabled_p ())
1926 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1927 group1_size
, group2_size
);
1932 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1933 statements and a vector of NUNITS elements. */
1936 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1938 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1941 /* Analyze an SLP instance starting from a group of grouped stores. Call
1942 vect_build_slp_tree to build a tree of packed stmts if possible.
1943 Return FALSE if it's impossible to SLP any stmt in the loop. */
1946 vect_analyze_slp_instance (vec_info
*vinfo
,
1947 scalar_stmts_to_slp_tree_map_t
*bst_map
,
1948 stmt_vec_info stmt_info
, unsigned max_tree_size
)
1950 slp_instance new_instance
;
1952 unsigned int group_size
;
1953 tree vectype
, scalar_type
= NULL_TREE
;
1955 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1956 vec
<stmt_vec_info
> scalar_stmts
;
1957 bool constructor
= false;
1959 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1961 scalar_type
= TREE_TYPE (DR_REF (dr
));
1962 group_size
= DR_GROUP_SIZE (stmt_info
);
1963 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
1965 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1967 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1968 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1969 group_size
= REDUC_GROUP_SIZE (stmt_info
);
1971 else if (is_gimple_assign (stmt_info
->stmt
)
1972 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
1974 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
1975 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
1980 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1981 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1982 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1989 "Build SLP failed: unsupported data-type %T\n",
1994 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1996 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1997 scalar_stmts
.create (group_size
);
1998 stmt_vec_info next_info
= stmt_info
;
1999 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2001 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2004 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2005 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2008 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2010 /* Collect the reduction stmts and store them in
2011 SLP_TREE_SCALAR_STMTS. */
2014 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2015 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2017 /* Mark the first element of the reduction chain as reduction to properly
2018 transform the node. In the reduction analysis phase only the last
2019 element of the chain is marked as reduction. */
2020 STMT_VINFO_DEF_TYPE (stmt_info
)
2021 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2022 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2023 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2025 else if (constructor
)
2027 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2029 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2031 if (TREE_CODE (val
) == SSA_NAME
)
2033 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2034 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2035 /* Value is defined in another basic block. */
2038 def_info
= vect_stmt_to_vectorize (def_info
);
2039 scalar_stmts
.safe_push (def_info
);
2044 if (dump_enabled_p ())
2045 dump_printf_loc (MSG_NOTE
, vect_location
,
2046 "Analyzing vectorizable constructor: %G\n",
2051 /* Collect reduction statements. */
2052 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2053 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2054 scalar_stmts
.safe_push (next_info
);
2057 /* Build the tree for the SLP instance. */
2058 bool *matches
= XALLOCAVEC (bool, group_size
);
2059 unsigned npermutes
= 0;
2060 poly_uint64 max_nunits
= nunits
;
2061 unsigned tree_size
= 0;
2062 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2063 &max_nunits
, matches
, &npermutes
,
2064 &tree_size
, bst_map
);
2067 /* Calculate the unrolling factor based on the smallest type. */
2068 poly_uint64 unrolling_factor
2069 = calculate_unrolling_factor (max_nunits
, group_size
);
2071 if (maybe_ne (unrolling_factor
, 1U)
2072 && is_a
<bb_vec_info
> (vinfo
))
2074 unsigned HOST_WIDE_INT const_max_nunits
;
2075 if (!max_nunits
.is_constant (&const_max_nunits
)
2076 || const_max_nunits
> group_size
)
2078 if (dump_enabled_p ())
2079 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2080 "Build SLP failed: store group "
2081 "size not a multiple of the vector size "
2082 "in basic block SLP\n");
2083 vect_free_slp_tree (node
, false);
2086 /* Fatal mismatch. */
2087 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2088 vect_free_slp_tree (node
, false);
2092 /* Create a new SLP instance. */
2093 new_instance
= XNEW (class _slp_instance
);
2094 SLP_INSTANCE_TREE (new_instance
) = node
;
2095 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2096 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2097 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2099 vect_gather_slp_loads (new_instance
, node
);
2100 if (dump_enabled_p ())
2101 dump_printf_loc (MSG_NOTE
, vect_location
,
2102 "SLP size %u vs. limit %u.\n",
2103 tree_size
, max_tree_size
);
2105 /* Check whether any load is possibly permuted. */
2107 bool loads_permuted
= false;
2108 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2110 if (!SLP_TREE_LOAD_PERMUTATION (load_node
).exists ())
2113 stmt_vec_info load_info
;
2114 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2115 if (SLP_TREE_LOAD_PERMUTATION (load_node
)[j
] != j
)
2117 loads_permuted
= true;
2122 /* If the loads and stores can be handled with load/store-lane
2123 instructions do not generate this SLP instance. */
2124 if (is_a
<loop_vec_info
> (vinfo
)
2126 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2129 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2131 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2132 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2133 /* Use SLP for strided accesses (or if we can't load-lanes). */
2134 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2135 || ! vect_load_lanes_supported
2136 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2137 DR_GROUP_SIZE (stmt_vinfo
), false))
2140 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2142 if (dump_enabled_p ())
2143 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2144 "Built SLP cancelled: can use "
2145 "load/store-lanes\n");
2146 vect_free_slp_instance (new_instance
, false);
2151 /* If this is a reduction chain with a conversion in front
2152 amend the SLP tree with a node for that. */
2154 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2155 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2157 /* Get at the conversion stmt - we know it's the single use
2158 of the last stmt of the reduction chain. */
2159 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2160 use_operand_p use_p
;
2162 bool r
= single_imm_use (gimple_assign_lhs (tem
),
2165 next_info
= vinfo
->lookup_stmt (use_stmt
);
2166 next_info
= vect_stmt_to_vectorize (next_info
);
2167 scalar_stmts
= vNULL
;
2168 scalar_stmts
.create (group_size
);
2169 for (unsigned i
= 0; i
< group_size
; ++i
)
2170 scalar_stmts
.quick_push (next_info
);
2171 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
, 1);
2172 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2173 SLP_INSTANCE_TREE (new_instance
) = conv
;
2174 /* We also have to fake this conversion stmt as SLP reduction
2175 group so we don't have to mess with too much code
2177 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2178 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2181 vinfo
->slp_instances
.safe_push (new_instance
);
2183 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2184 the number of scalar stmts in the root in a few places.
2185 Verify that assumption holds. */
2186 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance
))
2187 .length () == group_size
);
2189 if (dump_enabled_p ())
2191 dump_printf_loc (MSG_NOTE
, vect_location
,
2192 "Final SLP tree for instance:\n");
2193 vect_print_slp_graph (MSG_NOTE
, vect_location
,
2194 SLP_INSTANCE_TREE (new_instance
));
2202 /* Failed to SLP. */
2203 /* Free the allocated memory. */
2204 scalar_stmts
.release ();
2207 /* For basic block SLP, try to break the group up into multiples of the
2209 unsigned HOST_WIDE_INT const_nunits
;
2210 if (is_a
<bb_vec_info
> (vinfo
)
2211 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2212 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2213 && nunits
.is_constant (&const_nunits
))
2215 /* We consider breaking the group only on VF boundaries from the existing
2217 for (i
= 0; i
< group_size
; i
++)
2218 if (!matches
[i
]) break;
2220 if (i
>= const_nunits
&& i
< group_size
)
2222 /* Split into two groups at the first vector boundary before i. */
2223 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2224 unsigned group1_size
= i
& ~(const_nunits
- 1);
2226 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2228 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2230 /* If the first non-match was in the middle of a vector,
2231 skip the rest of that vector. */
2232 if (group1_size
< i
)
2234 i
= group1_size
+ const_nunits
;
2236 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2239 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2240 rest
, max_tree_size
);
2243 /* Even though the first vector did not all match, we might be able to SLP
2244 (some) of the remainder. FORNOW ignore this possibility. */
2251 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2252 trees of packed scalar stmts if SLP is possible. */
2255 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2258 stmt_vec_info first_element
;
2260 DUMP_VECT_SCOPE ("vect_analyze_slp");
2262 scalar_stmts_to_slp_tree_map_t
*bst_map
2263 = new scalar_stmts_to_slp_tree_map_t ();
2265 /* Find SLP sequences starting from groups of grouped stores. */
2266 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2267 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
, max_tree_size
);
2269 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2271 if (loop_vinfo
->reduction_chains
.length () > 0)
2273 /* Find SLP sequences starting from reduction chains. */
2274 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2275 if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2278 /* Dissolve reduction chain group. */
2279 stmt_vec_info vinfo
= first_element
;
2280 stmt_vec_info last
= NULL
;
2283 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2284 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2285 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2289 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2290 /* It can be still vectorized as part of an SLP reduction. */
2291 loop_vinfo
->reductions
.safe_push (last
);
2295 /* Find SLP sequences starting from groups of reductions. */
2296 if (loop_vinfo
->reductions
.length () > 1)
2297 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2301 /* The map keeps a reference on SLP nodes built, release that. */
2302 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2303 it
!= bst_map
->end (); ++it
)
2305 vect_free_slp_tree ((*it
).second
, false);
2308 /* Optimize permutations in SLP reductions. */
2309 slp_instance instance
;
2310 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2312 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2313 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2314 /* Reduction (there are no data-refs in the root).
2315 In reduction chain the order of the loads is not important. */
2316 if (!STMT_VINFO_DATA_REF (stmt_info
)
2317 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2318 vect_attempt_slp_rearrange_stmts (instance
);
2321 /* Gather all loads in the SLP graph. */
2322 hash_set
<slp_tree
> visited
;
2323 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2324 vect_gather_slp_loads (vinfo
->slp_loads
, SLP_INSTANCE_TREE (instance
),
2327 return opt_result::success ();
2331 vect_optimize_slp (vec_info
*vinfo
)
2335 FOR_EACH_VEC_ELT (vinfo
->slp_loads
, i
, node
)
2337 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2340 /* In basic block vectorization we allow any subchain of an interleaving
2342 FORNOW: not in loop SLP because of realignment complications. */
2343 if (is_a
<bb_vec_info
> (vinfo
))
2345 bool subchain_p
= true;
2346 stmt_vec_info next_load_info
= NULL
;
2347 stmt_vec_info load_info
;
2349 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2352 && (next_load_info
!= load_info
2353 || DR_GROUP_GAP (load_info
) != 1))
2358 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
2362 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2368 stmt_vec_info load_info
;
2369 bool this_load_permuted
= false;
2371 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2372 if (SLP_TREE_LOAD_PERMUTATION (node
)[j
] != j
)
2374 this_load_permuted
= true;
2377 stmt_vec_info first_stmt_info
2378 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
2379 if (!this_load_permuted
2380 /* The load requires permutation when unrolling exposes
2381 a gap either because the group is larger than the SLP
2382 group-size or because there is a gap between the groups. */
2383 && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a
<loop_vec_info
> (vinfo
)), 1U)
2384 || ((SLP_TREE_LANES (node
) == DR_GROUP_SIZE (first_stmt_info
))
2385 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2387 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2395 /* For each possible SLP instance decide whether to SLP it and calculate overall
2396 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2397 least one instance. */
2400 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2403 poly_uint64 unrolling_factor
= 1;
2404 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2405 slp_instance instance
;
2406 int decided_to_slp
= 0;
2408 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2410 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2412 /* FORNOW: SLP if you can. */
2413 /* All unroll factors have the form:
2415 GET_MODE_SIZE (vinfo->vector_mode) * X
2417 for some rational X, so they must have a common multiple. */
2419 = force_common_multiple (unrolling_factor
,
2420 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2422 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2423 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2424 loop-based vectorization. Such stmts will be marked as HYBRID. */
2425 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2429 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2431 if (decided_to_slp
&& dump_enabled_p ())
2433 dump_printf_loc (MSG_NOTE
, vect_location
,
2434 "Decided to SLP %d instances. Unrolling factor ",
2436 dump_dec (MSG_NOTE
, unrolling_factor
);
2437 dump_printf (MSG_NOTE
, "\n");
2440 return (decided_to_slp
> 0);
2443 /* Private data for vect_detect_hybrid_slp. */
2446 loop_vec_info loop_vinfo
;
2447 vec
<stmt_vec_info
> *worklist
;
2450 /* Walker for walk_gimple_op. */
2453 vect_detect_hybrid_slp (tree
*tp
, int *, void *data
)
2455 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2456 vdhs_data
*dat
= (vdhs_data
*)wi
->info
;
2461 stmt_vec_info def_stmt_info
= dat
->loop_vinfo
->lookup_def (*tp
);
2464 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
2465 if (PURE_SLP_STMT (def_stmt_info
))
2467 if (dump_enabled_p ())
2468 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2469 def_stmt_info
->stmt
);
2470 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2471 dat
->worklist
->safe_push (def_stmt_info
);
2477 /* Find stmts that must be both vectorized and SLPed. */
2480 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2482 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2484 /* All stmts participating in SLP are marked pure_slp, all other
2485 stmts are loop_vect.
2486 First collect all loop_vect stmts into a worklist. */
2487 auto_vec
<stmt_vec_info
> worklist
;
2488 for (unsigned i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2490 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2491 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
2494 gphi
*phi
= gsi
.phi ();
2495 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (phi
);
2496 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2497 worklist
.safe_push (stmt_info
);
2499 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2502 gimple
*stmt
= gsi_stmt (gsi
);
2503 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2504 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2506 for (gimple_stmt_iterator gsi2
2507 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
2508 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
2510 stmt_vec_info patt_info
2511 = loop_vinfo
->lookup_stmt (gsi_stmt (gsi2
));
2512 if (!STMT_SLP_TYPE (patt_info
)
2513 && STMT_VINFO_RELEVANT (patt_info
))
2514 worklist
.safe_push (patt_info
);
2516 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
2518 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2519 worklist
.safe_push (stmt_info
);
2523 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
2524 mark any SLP vectorized stmt as hybrid.
2525 ??? We're visiting def stmts N times (once for each non-SLP and
2526 once for each hybrid-SLP use). */
2529 dat
.worklist
= &worklist
;
2530 dat
.loop_vinfo
= loop_vinfo
;
2531 memset (&wi
, 0, sizeof (wi
));
2532 wi
.info
= (void *)&dat
;
2533 while (!worklist
.is_empty ())
2535 stmt_vec_info stmt_info
= worklist
.pop ();
2536 /* Since SSA operands are not set up for pattern stmts we need
2537 to use walk_gimple_op. */
2539 walk_gimple_op (stmt_info
->stmt
, vect_detect_hybrid_slp
, &wi
);
2544 /* Initialize a bb_vec_info struct for the statements between
2545 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2547 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2548 gimple_stmt_iterator region_end_in
,
2549 vec_info_shared
*shared
)
2550 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2551 bb (gsi_bb (region_begin_in
)),
2552 region_begin (region_begin_in
),
2553 region_end (region_end_in
)
2555 gimple_stmt_iterator gsi
;
2557 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2560 gimple
*stmt
= gsi_stmt (gsi
);
2561 gimple_set_uid (stmt
, 0);
2569 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2570 stmts in the basic block. */
2572 _bb_vec_info::~_bb_vec_info ()
2574 for (gimple_stmt_iterator si
= region_begin
;
2575 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2576 /* Reset region marker. */
2577 gimple_set_uid (gsi_stmt (si
), -1);
2582 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2583 given then that child nodes have already been processed, and that
2584 their def types currently match their SLP node's def type. */
2587 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2588 slp_instance node_instance
,
2589 stmt_vector_for_cost
*cost_vec
)
2591 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
2592 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2594 /* Calculate the number of vector statements to be created for the
2595 scalar stmts in this node. For SLP reductions it is equal to the
2596 number of vector statements in the children (which has already been
2597 calculated by the recursive call). Otherwise it is the number of
2598 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2599 VF divided by the number of elements in a vector. */
2600 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2601 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2603 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2604 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2606 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2607 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2614 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2615 vf
= loop_vinfo
->vectorization_factor
;
2618 unsigned int group_size
= SLP_TREE_LANES (node
);
2619 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2620 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2621 = vect_get_num_vectors (vf
* group_size
, vectype
);
2625 return vect_analyze_stmt (vinfo
, stmt_info
, &dummy
,
2626 node
, node_instance
, cost_vec
);
2629 /* Try to build NODE from scalars, returning true on success.
2630 NODE_INSTANCE is the SLP instance that contains NODE. */
2633 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
2634 slp_instance node_instance
)
2636 stmt_vec_info stmt_info
;
2639 if (!is_a
<bb_vec_info
> (vinfo
)
2640 || node
== SLP_INSTANCE_TREE (node_instance
)
2641 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
2644 if (dump_enabled_p ())
2645 dump_printf_loc (MSG_NOTE
, vect_location
,
2646 "Building vector operands from scalars instead\n");
2648 /* Don't remove and free the child nodes here, since they could be
2649 referenced by other structures. The analysis and scheduling phases
2650 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2651 unsigned int group_size
= SLP_TREE_LANES (node
);
2652 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
2653 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
);
2654 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2656 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
2657 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
2662 /* Compute the prologue cost for invariant or constant operands represented
2666 vect_prologue_cost_for_slp (slp_tree node
,
2667 stmt_vector_for_cost
*cost_vec
)
2669 /* Without looking at the actual initializer a vector of
2670 constants can be implemented as load from the constant pool.
2671 When all elements are the same we can use a splat. */
2672 tree vectype
= SLP_TREE_VECTYPE (node
);
2673 unsigned group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
2674 unsigned num_vects_to_check
;
2675 unsigned HOST_WIDE_INT const_nunits
;
2676 unsigned nelt_limit
;
2677 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
2678 && ! multiple_p (const_nunits
, group_size
))
2680 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
2681 nelt_limit
= const_nunits
;
2685 /* If either the vector has variable length or the vectors
2686 are composed of repeated whole groups we only need to
2687 cost construction once. All vectors will be the same. */
2688 num_vects_to_check
= 1;
2689 nelt_limit
= group_size
;
2691 tree elt
= NULL_TREE
;
2693 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
2695 unsigned si
= j
% group_size
;
2697 elt
= SLP_TREE_SCALAR_OPS (node
)[si
];
2698 /* ??? We're just tracking whether all operands of a single
2699 vector initializer are the same, ideally we'd check if
2700 we emitted the same one already. */
2701 else if (elt
!= SLP_TREE_SCALAR_OPS (node
)[si
])
2704 if (nelt
== nelt_limit
)
2706 record_stmt_cost (cost_vec
, 1,
2707 SLP_TREE_DEF_TYPE (node
) == vect_external_def
2708 ? (elt
? scalar_to_vec
: vec_construct
)
2710 NULL
, vectype
, 0, vect_prologue
);
2716 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2717 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2719 Return true if the operations are supported. */
2722 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2723 slp_instance node_instance
,
2724 hash_set
<slp_tree
> &visited
,
2725 hash_set
<slp_tree
> &lvisited
,
2726 stmt_vector_for_cost
*cost_vec
)
2731 /* Assume we can code-generate all invariants. */
2732 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2735 /* If we already analyzed the exact same set of scalar stmts we're done.
2736 We share the generated vector stmts for those.
2737 The SLP graph is acyclic so not caching whether we failed or succeeded
2738 doesn't result in any issue since we throw away the lvisited set
2740 if (visited
.contains (node
)
2741 || lvisited
.add (node
))
2744 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2745 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2746 visited
, lvisited
, cost_vec
))
2749 bool res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2752 /* When the node can be vectorized cost invariant nodes it references.
2753 This is not done in DFS order to allow the refering node
2754 vectorizable_* calls to nail down the invariant nodes vector type
2755 and possibly unshare it if it needs a different vector type than
2758 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2759 if ((SLP_TREE_DEF_TYPE (child
) == vect_constant_def
2760 || SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
2761 /* Perform usual caching, note code-generation still
2762 code-gens these nodes multiple times but we expect
2763 to CSE them later. */
2764 && !visited
.contains (child
)
2765 && !lvisited
.add (child
))
2767 /* ??? After auditing more code paths make a "default"
2768 and push the vector type from NODE to all children
2769 if it is not already set. */
2770 /* Compute the number of vectors to be generated. */
2771 tree vector_type
= SLP_TREE_VECTYPE (child
);
2774 /* For shifts with a scalar argument we don't need
2775 to cost or code-generate anything.
2776 ??? Represent this more explicitely. */
2777 gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
2778 == shift_vec_info_type
)
2782 unsigned group_size
= SLP_TREE_SCALAR_OPS (child
).length ();
2784 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2785 vf
= loop_vinfo
->vectorization_factor
;
2786 SLP_TREE_NUMBER_OF_VEC_STMTS (child
)
2787 = vect_get_num_vectors (vf
* group_size
, vector_type
);
2788 /* And cost them. */
2789 vect_prologue_cost_for_slp (child
, cost_vec
);
2792 /* If this node can't be vectorized, try pruning the tree here rather
2793 than felling the whole thing. */
2794 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
2796 /* We'll need to revisit this for invariant costing and number
2797 of vectorized stmt setting. */
2798 lvisited
.remove (node
);
2806 /* Analyze statements in SLP instances of VINFO. Return true if the
2807 operations are supported. */
2810 vect_slp_analyze_operations (vec_info
*vinfo
)
2812 slp_instance instance
;
2815 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2817 hash_set
<slp_tree
> visited
;
2818 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2820 hash_set
<slp_tree
> lvisited
;
2821 stmt_vector_for_cost cost_vec
;
2822 cost_vec
.create (2);
2823 if (!vect_slp_analyze_node_operations (vinfo
,
2824 SLP_INSTANCE_TREE (instance
),
2825 instance
, visited
, lvisited
,
2827 /* Instances with a root stmt require vectorized defs for the
2829 || (SLP_INSTANCE_ROOT_STMT (instance
)
2830 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
2831 != vect_internal_def
)))
2833 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2834 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2835 if (dump_enabled_p ())
2836 dump_printf_loc (MSG_NOTE
, vect_location
,
2837 "removing SLP instance operations starting from: %G",
2839 vect_free_slp_instance (instance
, false);
2840 vinfo
->slp_instances
.ordered_remove (i
);
2841 cost_vec
.release ();
2845 for (hash_set
<slp_tree
>::iterator x
= lvisited
.begin();
2846 x
!= lvisited
.end(); ++x
)
2850 add_stmt_costs (vinfo
, vinfo
->target_cost_data
, &cost_vec
);
2851 cost_vec
.release ();
2855 return !vinfo
->slp_instances
.is_empty ();
2859 /* Compute the scalar cost of the SLP node NODE and its children
2860 and return it. Do not account defs that are marked in LIFE and
2861 update LIFE according to uses of NODE. */
2864 vect_bb_slp_scalar_cost (vec_info
*vinfo
,
2865 slp_tree node
, vec
<bool, va_heap
> *life
,
2866 stmt_vector_for_cost
*cost_vec
,
2867 hash_set
<slp_tree
> &visited
)
2870 stmt_vec_info stmt_info
;
2873 if (visited
.add (node
))
2876 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2878 gimple
*stmt
= stmt_info
->stmt
;
2879 ssa_op_iter op_iter
;
2880 def_operand_p def_p
;
2885 /* If there is a non-vectorized use of the defs then the scalar
2886 stmt is kept live in which case we do not account it or any
2887 required defs in the SLP children in the scalar cost. This
2888 way we make the vectorization more costly when compared to
2890 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2892 imm_use_iterator use_iter
;
2894 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2895 if (!is_gimple_debug (use_stmt
))
2897 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
2898 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
2901 BREAK_FROM_IMM_USE_STMT (use_iter
);
2908 /* Count scalar stmts only once. */
2909 if (gimple_visited_p (stmt
))
2911 gimple_set_visited (stmt
, true);
2913 vect_cost_for_stmt kind
;
2914 if (STMT_VINFO_DATA_REF (stmt_info
))
2916 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2919 kind
= scalar_store
;
2921 else if (vect_nop_conversion_p (stmt_info
))
2925 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2928 auto_vec
<bool, 20> subtree_life
;
2929 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2931 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2933 /* Do not directly pass LIFE to the recursive call, copy it to
2934 confine changes in the callee to the current child/subtree. */
2935 subtree_life
.safe_splice (*life
);
2936 vect_bb_slp_scalar_cost (vinfo
, child
, &subtree_life
, cost_vec
,
2938 subtree_life
.truncate (0);
2943 /* Check if vectorization of the basic block is profitable. */
2946 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2948 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2949 slp_instance instance
;
2951 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2952 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2954 /* Calculate scalar cost. */
2955 stmt_vector_for_cost scalar_costs
;
2956 scalar_costs
.create (0);
2957 hash_set
<slp_tree
> visited
;
2958 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2960 auto_vec
<bool, 20> life
;
2961 life
.safe_grow_cleared (SLP_TREE_LANES (SLP_INSTANCE_TREE (instance
)));
2962 vect_bb_slp_scalar_cost (bb_vinfo
,
2963 SLP_INSTANCE_TREE (instance
),
2964 &life
, &scalar_costs
, visited
);
2966 /* Unset visited flag. */
2967 stmt_info_for_cost
*si
;
2968 FOR_EACH_VEC_ELT (scalar_costs
, i
, si
)
2969 gimple_set_visited (si
->stmt_info
->stmt
, false);
2971 void *target_cost_data
= init_cost (NULL
);
2972 add_stmt_costs (bb_vinfo
, target_cost_data
, &scalar_costs
);
2973 scalar_costs
.release ();
2975 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2976 destroy_cost_data (target_cost_data
);
2978 /* Complete the target-specific cost calculation. */
2979 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2980 &vec_inside_cost
, &vec_epilogue_cost
);
2982 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2984 if (dump_enabled_p ())
2986 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2987 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2989 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2990 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2991 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2994 /* Vectorization is profitable if its cost is more than the cost of scalar
2995 version. Note that we err on the vector side for equal cost because
2996 the cost estimate is otherwise quite pessimistic (constant uses are
2997 free on the scalar side but cost a load on the vector side for
2999 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3005 /* Find any vectorizable constructors and add them to the grouped_store
3009 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
3011 gimple_stmt_iterator gsi
;
3013 for (gsi
= bb_vinfo
->region_begin
;
3014 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3016 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3017 if (!stmt
|| gimple_assign_rhs_code (stmt
) != CONSTRUCTOR
)
3020 tree rhs
= gimple_assign_rhs1 (stmt
);
3021 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
3022 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
3023 CONSTRUCTOR_NELTS (rhs
))
3024 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
3025 || uniform_vector_p (rhs
))
3028 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
3029 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3033 /* Check if the region described by BB_VINFO can be vectorized, returning
3034 true if so. When returning false, set FATAL to true if the same failure
3035 would prevent vectorization at other vector sizes, false if it is still
3036 worth trying other sizes. N_STMTS is the number of statements in the
3040 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
3042 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3044 slp_instance instance
;
3046 poly_uint64 min_vf
= 2;
3048 /* The first group of checks is independent of the vector size. */
3051 /* Analyze the data references. */
3053 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3055 if (dump_enabled_p ())
3056 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3057 "not vectorized: unhandled data-ref in basic "
3062 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
3064 if (dump_enabled_p ())
3065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3066 "not vectorized: not enough data-refs in "
3071 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3073 if (dump_enabled_p ())
3074 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3075 "not vectorized: unhandled data access in "
3080 vect_slp_check_for_constructors (bb_vinfo
);
3082 /* If there are no grouped stores in the region there is no need
3083 to continue with pattern recog as vect_analyze_slp will fail
3085 if (bb_vinfo
->grouped_stores
.is_empty ())
3087 if (dump_enabled_p ())
3088 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3089 "not vectorized: no grouped stores in "
3094 /* While the rest of the analysis below depends on it in some way. */
3097 vect_pattern_recog (bb_vinfo
);
3099 /* Check the SLP opportunities in the basic block, analyze and build SLP
3101 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3103 if (dump_enabled_p ())
3105 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3106 "Failed to SLP the basic block.\n");
3107 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3108 "not vectorized: failed to find SLP opportunities "
3109 "in basic block.\n");
3114 /* Optimize permutations. */
3115 vect_optimize_slp (bb_vinfo
);
3117 vect_record_base_alignments (bb_vinfo
);
3119 /* Analyze and verify the alignment of data references and the
3120 dependence in the SLP instances. */
3121 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3123 if (! vect_slp_analyze_and_verify_instance_alignment (bb_vinfo
, instance
)
3124 || ! vect_slp_analyze_instance_dependence (bb_vinfo
, instance
))
3126 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3127 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3128 if (dump_enabled_p ())
3129 dump_printf_loc (MSG_NOTE
, vect_location
,
3130 "removing SLP instance operations starting from: %G",
3132 vect_free_slp_instance (instance
, false);
3133 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3137 /* Mark all the statements that we want to vectorize as pure SLP and
3139 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3140 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3141 if (SLP_INSTANCE_ROOT_STMT (instance
))
3142 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3146 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3149 if (!vect_slp_analyze_operations (bb_vinfo
))
3151 if (dump_enabled_p ())
3152 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3153 "not vectorized: bad operation in basic block.\n");
3157 /* Cost model: check if the vectorization is worthwhile. */
3158 if (!unlimited_cost_model (NULL
)
3159 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3161 if (dump_enabled_p ())
3162 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3163 "not vectorized: vectorization is not "
3168 if (dump_enabled_p ())
3169 dump_printf_loc (MSG_NOTE
, vect_location
,
3170 "Basic block will be vectorized using SLP\n");
3174 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3175 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3176 on success. The region has N_STMTS statements and has the datarefs
3177 given by DATAREFS. */
3180 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3181 gimple_stmt_iterator region_end
,
3182 vec
<data_reference_p
> datarefs
,
3183 unsigned int n_stmts
)
3185 bb_vec_info bb_vinfo
;
3186 auto_vector_modes vector_modes
;
3188 /* Autodetect first vector size we try. */
3189 machine_mode next_vector_mode
= VOIDmode
;
3190 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3191 unsigned int mode_i
= 0;
3193 vec_info_shared shared
;
3195 machine_mode autodetected_vector_mode
= VOIDmode
;
3198 bool vectorized
= false;
3200 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3202 bool first_time_p
= shared
.datarefs
.is_empty ();
3203 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3205 bb_vinfo
->shared
->save_datarefs ();
3207 bb_vinfo
->shared
->check_datarefs ();
3208 bb_vinfo
->vector_mode
= next_vector_mode
;
3210 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3211 && dbg_cnt (vect_slp
))
3213 if (dump_enabled_p ())
3215 dump_printf_loc (MSG_NOTE
, vect_location
,
3216 "***** Analysis succeeded with vector mode"
3217 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
3218 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3221 bb_vinfo
->shared
->check_datarefs ();
3222 vect_schedule_slp (bb_vinfo
);
3224 unsigned HOST_WIDE_INT bytes
;
3225 if (dump_enabled_p ())
3227 if (GET_MODE_SIZE (bb_vinfo
->vector_mode
).is_constant (&bytes
))
3228 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3229 "basic block part vectorized using %wu byte "
3230 "vectors\n", bytes
);
3232 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3233 "basic block part vectorized using variable "
3234 "length vectors\n");
3241 if (dump_enabled_p ())
3242 dump_printf_loc (MSG_NOTE
, vect_location
,
3243 "***** Analysis failed with vector mode %s\n",
3244 GET_MODE_NAME (bb_vinfo
->vector_mode
));
3248 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
3251 while (mode_i
< vector_modes
.length ()
3252 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
3254 if (dump_enabled_p ())
3255 dump_printf_loc (MSG_NOTE
, vect_location
,
3256 "***** The result for vector mode %s would"
3258 GET_MODE_NAME (vector_modes
[mode_i
]));
3264 if (mode_i
< vector_modes
.length ()
3265 && VECTOR_MODE_P (autodetected_vector_mode
)
3266 && (related_vector_mode (vector_modes
[mode_i
],
3267 GET_MODE_INNER (autodetected_vector_mode
))
3268 == autodetected_vector_mode
)
3269 && (related_vector_mode (autodetected_vector_mode
,
3270 GET_MODE_INNER (vector_modes
[mode_i
]))
3271 == vector_modes
[mode_i
]))
3273 if (dump_enabled_p ())
3274 dump_printf_loc (MSG_NOTE
, vect_location
,
3275 "***** Skipping vector mode %s, which would"
3276 " repeat the analysis for %s\n",
3277 GET_MODE_NAME (vector_modes
[mode_i
]),
3278 GET_MODE_NAME (autodetected_vector_mode
));
3283 || mode_i
== vector_modes
.length ()
3284 || autodetected_vector_mode
== VOIDmode
3285 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3286 vector sizes will fail do not bother iterating. */
3290 /* Try the next biggest vector size. */
3291 next_vector_mode
= vector_modes
[mode_i
++];
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_NOTE
, vect_location
,
3294 "***** Re-trying analysis with vector mode %s\n",
3295 GET_MODE_NAME (next_vector_mode
));
3299 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3300 true if anything in the basic-block was vectorized. */
3303 vect_slp_bb (basic_block bb
)
3305 gimple_stmt_iterator gsi
;
3306 bool any_vectorized
= false;
3308 gsi
= gsi_after_labels (bb
);
3309 while (!gsi_end_p (gsi
))
3311 gimple_stmt_iterator region_begin
= gsi
;
3312 vec
<data_reference_p
> datarefs
= vNULL
;
3315 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3317 gimple
*stmt
= gsi_stmt (gsi
);
3318 if (is_gimple_debug (stmt
))
3322 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3323 vect_location
= stmt
;
3325 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3329 /* Skip leading unhandled stmts. */
3330 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3336 gimple_stmt_iterator region_end
= gsi
;
3338 if (insns
> param_slp_max_insns_in_bb
)
3340 if (dump_enabled_p ())
3341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3342 "not vectorized: too many instructions in "
3345 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3346 any_vectorized
= true;
3348 if (gsi_end_p (region_end
))
3351 /* Skip the unhandled stmt. */
3355 return any_vectorized
;
3359 /* Build a variable-length vector in which the elements in ELTS are repeated
3360 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3361 RESULTS and add any new instructions to SEQ.
3363 The approach we use is:
3365 (1) Find a vector mode VM with integer elements of mode IM.
3367 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3368 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3369 from small vectors to IM.
3371 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3373 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3374 correct byte contents.
3376 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3378 We try to find the largest IM for which this sequence works, in order
3379 to cut down on the number of interleaves. */
3382 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3383 vec
<tree
> elts
, unsigned int nresults
,
3386 unsigned int nelts
= elts
.length ();
3387 tree element_type
= TREE_TYPE (vector_type
);
3389 /* (1) Find a vector mode VM with integer elements of mode IM. */
3390 unsigned int nvectors
= 1;
3391 tree new_vector_type
;
3393 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
3394 &nvectors
, &new_vector_type
,
3398 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3399 unsigned int partial_nelts
= nelts
/ nvectors
;
3400 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3402 tree_vector_builder partial_elts
;
3403 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3404 pieces
.quick_grow (nvectors
* 2);
3405 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3407 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3408 ELTS' has mode IM. */
3409 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3410 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3411 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3412 tree t
= gimple_build_vector (seq
, &partial_elts
);
3413 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3414 TREE_TYPE (new_vector_type
), t
);
3416 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3417 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3420 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3421 correct byte contents.
3423 We need to repeat the following operation log2(nvectors) times:
3425 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3426 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3428 However, if each input repeats every N elements and the VF is
3429 a multiple of N * 2, the HI result is the same as the LO. */
3430 unsigned int in_start
= 0;
3431 unsigned int out_start
= nvectors
;
3432 unsigned int hi_start
= nvectors
/ 2;
3433 /* A bound on the number of outputs needed to produce NRESULTS results
3434 in the final iteration. */
3435 unsigned int noutputs_bound
= nvectors
* nresults
;
3436 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3438 noutputs_bound
/= 2;
3439 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3440 for (unsigned int i
= 0; i
< limit
; ++i
)
3443 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3446 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3450 tree output
= make_ssa_name (new_vector_type
);
3451 tree input1
= pieces
[in_start
+ (i
/ 2)];
3452 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3453 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3456 gimple_seq_add_stmt (seq
, stmt
);
3457 pieces
[out_start
+ i
] = output
;
3459 std::swap (in_start
, out_start
);
3462 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3463 results
.reserve (nresults
);
3464 for (unsigned int i
= 0; i
< nresults
; ++i
)
3466 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3467 pieces
[in_start
+ i
]));
3469 results
.quick_push (results
[i
- nvectors
]);
3473 /* For constant and loop invariant defs in OP_NODE this function creates
3474 vector defs that will be used in the vectorized stmts and stores them
3475 to SLP_TREE_VEC_DEFS of OP_NODE. */
3478 vect_create_constant_vectors (vec_info
*vinfo
, slp_tree op_node
)
3480 unsigned HOST_WIDE_INT nunits
;
3482 unsigned j
, number_of_places_left_in_vector
;
3485 int group_size
= op_node
->ops
.length ();
3486 unsigned int vec_num
, i
;
3487 unsigned number_of_copies
= 1;
3489 gimple_seq ctor_seq
= NULL
;
3490 auto_vec
<tree
, 16> permute_results
;
3492 /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
3493 vector_type
= SLP_TREE_VECTYPE (op_node
);
3495 unsigned int number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (op_node
);
3496 SLP_TREE_VEC_DEFS (op_node
).create (number_of_vectors
);
3497 auto_vec
<tree
> voprnds (number_of_vectors
);
3499 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3500 created vectors. It is greater than 1 if unrolling is performed.
3502 For example, we have two scalar operands, s1 and s2 (e.g., group of
3503 strided accesses of size two), while NUNITS is four (i.e., four scalars
3504 of this type can be packed in a vector). The output vector will contain
3505 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3508 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3509 containing the operands.
3511 For example, NUNITS is four as before, and the group size is 8
3512 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3513 {s5, s6, s7, s8}. */
3515 /* When using duplicate_and_interleave, we just need one element for
3516 each scalar statement. */
3517 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3518 nunits
= group_size
;
3520 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3522 number_of_places_left_in_vector
= nunits
;
3524 tree_vector_builder
elts (vector_type
, nunits
, 1);
3525 elts
.quick_grow (nunits
);
3526 stmt_vec_info insert_after
= NULL
;
3527 for (j
= 0; j
< number_of_copies
; j
++)
3530 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3532 /* Create 'vect_ = {op0,op1,...,opn}'. */
3533 number_of_places_left_in_vector
--;
3535 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3537 if (CONSTANT_CLASS_P (op
))
3539 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3541 /* Can't use VIEW_CONVERT_EXPR for booleans because
3542 of possibly different sizes of scalar value and
3544 if (integer_zerop (op
))
3545 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3546 else if (integer_onep (op
))
3547 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3552 op
= fold_unary (VIEW_CONVERT_EXPR
,
3553 TREE_TYPE (vector_type
), op
);
3554 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3558 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3560 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3563 = build_all_ones_cst (TREE_TYPE (vector_type
));
3565 = build_zero_cst (TREE_TYPE (vector_type
));
3566 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3567 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3573 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3576 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3579 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3583 elts
[number_of_places_left_in_vector
] = op
;
3584 if (!CONSTANT_CLASS_P (op
))
3586 /* For BB vectorization we have to compute an insert location
3587 when a def is inside the analyzed region since we cannot
3588 simply insert at the BB start in this case. */
3589 stmt_vec_info opdef
;
3590 if (TREE_CODE (orig_op
) == SSA_NAME
3591 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3592 && is_a
<bb_vec_info
> (vinfo
)
3593 && (opdef
= vinfo
->lookup_def (orig_op
)))
3596 insert_after
= opdef
;
3598 insert_after
= get_later_stmt (insert_after
, opdef
);
3601 if (number_of_places_left_in_vector
== 0)
3604 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3605 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3606 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3609 if (permute_results
.is_empty ())
3610 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3611 elts
, number_of_vectors
,
3613 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3618 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_after
->stmt
);
3619 /* vect_init_vector inserts before. */
3621 init
= vect_init_vector (vinfo
, NULL
, vec_cst
,
3625 init
= vect_init_vector (vinfo
, NULL
, vec_cst
,
3627 if (ctor_seq
!= NULL
)
3629 gimple_stmt_iterator gsi
3630 = gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3631 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3634 voprnds
.quick_push (init
);
3635 insert_after
= NULL
;
3636 number_of_places_left_in_vector
= nunits
;
3638 elts
.new_vector (vector_type
, nunits
, 1);
3639 elts
.quick_grow (nunits
);
3644 /* Since the vectors are created in the reverse order, we should invert
3646 vec_num
= voprnds
.length ();
3647 for (j
= vec_num
; j
!= 0; j
--)
3649 vop
= voprnds
[j
- 1];
3650 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
3653 /* In case that VF is greater than the unrolling factor needed for the SLP
3654 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3655 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3656 to replicate the vectors. */
3657 while (number_of_vectors
> SLP_TREE_VEC_DEFS (op_node
).length ())
3658 for (i
= 0; SLP_TREE_VEC_DEFS (op_node
).iterate (i
, &vop
) && i
< vec_num
;
3660 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
3663 /* Get the Ith vectorized definition from SLP_NODE. */
3666 vect_get_slp_vect_def (slp_tree slp_node
, unsigned i
)
3668 if (SLP_TREE_VEC_STMTS (slp_node
).exists ())
3669 return gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]->stmt
);
3671 return SLP_TREE_VEC_DEFS (slp_node
)[i
];
3674 /* Get N vectorized definitions for SLP_NODE. */
3677 vect_get_slp_defs (vec_info
*,
3678 slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3681 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3683 for (unsigned i
= 0; i
< n
; ++i
)
3685 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3687 vec
<tree
> vec_defs
= vNULL
;
3689 /* For each operand we check if it has vectorized definitions in a child
3690 node or we need to create them (for invariants and constants). */
3691 vec_defs
.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child
));
3692 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3695 stmt_vec_info vec_def_stmt_info
;
3696 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (child
), j
, vec_def_stmt_info
)
3697 vec_defs
.quick_push (gimple_get_lhs (vec_def_stmt_info
->stmt
));
3700 vec_defs
.splice (SLP_TREE_VEC_DEFS (child
));
3702 vec_oprnds
->quick_push (vec_defs
);
3706 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3707 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3708 permute statements for the SLP node NODE. */
3711 vect_transform_slp_perm_load (vec_info
*vinfo
,
3712 slp_tree node
, vec
<tree
> dr_chain
,
3713 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3714 bool analyze_only
, unsigned *n_perms
)
3716 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3718 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3719 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
3720 unsigned int mask_element
;
3723 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3726 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3728 mode
= TYPE_MODE (vectype
);
3729 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3731 /* Initialize the vect stmts of NODE to properly insert the generated
3734 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3735 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3736 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3738 /* Generate permutation masks for every NODE. Number of masks for each NODE
3739 is equal to GROUP_SIZE.
3740 E.g., we have a group of three nodes with three loads from the same
3741 location in each node, and the vector size is 4. I.e., we have a
3742 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3743 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3744 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3747 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3748 The last mask is illegal since we assume two operands for permute
3749 operation, and the mask element values can't be outside that range.
3750 Hence, the last mask must be converted into {2,5,5,5}.
3751 For the first two permutations we need the first and the second input
3752 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3753 we need the second and the third vectors: {b1,c1,a2,b2} and
3756 int vect_stmts_counter
= 0;
3757 unsigned int index
= 0;
3758 int first_vec_index
= -1;
3759 int second_vec_index
= -1;
3763 vec_perm_builder mask
;
3764 unsigned int nelts_to_build
;
3765 unsigned int nvectors_per_build
;
3766 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3767 && multiple_p (nunits
, group_size
));
3770 /* A single vector contains a whole number of copies of the node, so:
3771 (a) all permutes can use the same mask; and
3772 (b) the permutes only need a single vector input. */
3773 mask
.new_vector (nunits
, group_size
, 3);
3774 nelts_to_build
= mask
.encoded_nelts ();
3775 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3779 /* We need to construct a separate mask for each vector statement. */
3780 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3781 if (!nunits
.is_constant (&const_nunits
)
3782 || !vf
.is_constant (&const_vf
))
3784 mask
.new_vector (const_nunits
, const_nunits
, 1);
3785 nelts_to_build
= const_vf
* group_size
;
3786 nvectors_per_build
= 1;
3789 unsigned int count
= mask
.encoded_nelts ();
3790 mask
.quick_grow (count
);
3791 vec_perm_indices indices
;
3793 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3795 unsigned int iter_num
= j
/ group_size
;
3796 unsigned int stmt_num
= j
% group_size
;
3797 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
3798 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
3801 first_vec_index
= 0;
3806 /* Enforced before the loop when !repeating_p. */
3807 unsigned int const_nunits
= nunits
.to_constant ();
3808 vec_index
= i
/ const_nunits
;
3809 mask_element
= i
% const_nunits
;
3810 if (vec_index
== first_vec_index
3811 || first_vec_index
== -1)
3813 first_vec_index
= vec_index
;
3815 else if (vec_index
== second_vec_index
3816 || second_vec_index
== -1)
3818 second_vec_index
= vec_index
;
3819 mask_element
+= const_nunits
;
3823 if (dump_enabled_p ())
3824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3825 "permutation requires at "
3826 "least three vectors %G",
3828 gcc_assert (analyze_only
);
3832 gcc_assert (mask_element
< 2 * const_nunits
);
3835 if (mask_element
!= index
)
3837 mask
[index
++] = mask_element
;
3839 if (index
== count
&& !noop_p
)
3841 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
3842 if (!can_vec_perm_const_p (mode
, indices
))
3844 if (dump_enabled_p ())
3846 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3848 "unsupported vect permute { ");
3849 for (i
= 0; i
< count
; ++i
)
3851 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3852 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3854 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3856 gcc_assert (analyze_only
);
3867 tree mask_vec
= NULL_TREE
;
3870 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
3872 if (second_vec_index
== -1)
3873 second_vec_index
= first_vec_index
;
3875 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
3877 /* Generate the permute statement if necessary. */
3878 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
3879 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
3880 stmt_vec_info perm_stmt_info
;
3883 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3885 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3887 perm_dest
= make_ssa_name (perm_dest
);
3889 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3890 first_vec
, second_vec
,
3893 = vect_finish_stmt_generation (vinfo
,
3894 stmt_info
, perm_stmt
,
3898 /* If mask was NULL_TREE generate the requested
3899 identity transform. */
3900 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
3902 /* Store the vector statement in NODE. */
3903 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
3909 first_vec_index
= -1;
3910 second_vec_index
= -1;
3918 /* Vectorize SLP instance tree in postorder. */
3921 vect_schedule_slp_instance (vec_info
*vinfo
,
3922 slp_tree node
, slp_instance instance
)
3924 gimple_stmt_iterator si
;
3928 /* See if we have already vectorized the node in the graph of the
3930 if ((SLP_TREE_DEF_TYPE (node
) == vect_internal_def
3931 && SLP_TREE_VEC_STMTS (node
).exists ())
3932 || SLP_TREE_VEC_DEFS (node
).exists ())
3935 /* Vectorize externals and constants. */
3936 if (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
3937 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
)
3939 /* ??? vectorizable_shift can end up using a scalar operand which is
3940 currently denoted as !SLP_TREE_VECTYPE. No need to vectorize the
3941 node in this case. */
3942 if (!SLP_TREE_VECTYPE (node
))
3945 vect_create_constant_vectors (vinfo
, node
);
3949 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3950 vect_schedule_slp_instance (vinfo
, child
, instance
);
3952 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
3954 /* VECTYPE is the type of the destination. */
3955 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3956 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3957 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
3959 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
3960 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3962 if (dump_enabled_p ())
3963 dump_printf_loc (MSG_NOTE
, vect_location
,
3964 "------>vectorizing SLP node starting from: %G",
3967 /* Vectorized stmts go before the last scalar stmt which is where
3968 all uses are ready. */
3969 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
3970 si
= gsi_for_stmt (last_stmt_info
->stmt
);
3972 /* Handle two-operation SLP nodes by vectorizing the group with
3973 both operations and then performing a merge. */
3974 bool done_p
= false;
3975 if (SLP_TREE_TWO_OPERATORS (node
))
3977 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3978 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3979 enum tree_code ocode
= ERROR_MARK
;
3980 stmt_vec_info ostmt_info
;
3981 vec_perm_builder
mask (group_size
, group_size
, 1);
3982 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
3984 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
3985 if (gimple_assign_rhs_code (ostmt
) != code0
)
3987 mask
.quick_push (1);
3988 ocode
= gimple_assign_rhs_code (ostmt
);
3991 mask
.quick_push (0);
3993 if (ocode
!= ERROR_MARK
)
3995 vec
<stmt_vec_info
> v0
;
3996 vec
<stmt_vec_info
> v1
;
3998 tree tmask
= NULL_TREE
;
3999 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4000 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
4001 SLP_TREE_VEC_STMTS (node
).truncate (0);
4002 gimple_assign_set_rhs_code (stmt
, ocode
);
4003 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4004 gimple_assign_set_rhs_code (stmt
, code0
);
4005 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
4006 SLP_TREE_VEC_STMTS (node
).truncate (0);
4007 tree meltype
= build_nonstandard_integer_type
4008 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
4009 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
4011 for (j
= 0; j
< v0
.length (); ++j
)
4013 /* Enforced by vect_build_slp_tree, which rejects variable-length
4014 vectors for SLP_TREE_TWO_OPERATORS. */
4015 unsigned int const_nunits
= nunits
.to_constant ();
4016 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
4017 for (l
= 0; l
< const_nunits
; ++l
)
4019 if (k
>= group_size
)
4021 tree t
= build_int_cst (meltype
,
4022 mask
[k
++] * const_nunits
+ l
);
4023 melts
.quick_push (t
);
4025 tmask
= melts
.build ();
4027 /* ??? Not all targets support a VEC_PERM_EXPR with a
4028 constant mask that would translate to a vec_merge RTX
4029 (with their vec_perm_const_ok). We can either not
4030 vectorize in that case or let veclower do its job.
4031 Unfortunately that isn't too great and at least for
4032 plus/minus we'd eventually like to match targets
4033 vector addsub instructions. */
4035 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4037 gimple_assign_lhs (v0
[j
]->stmt
),
4038 gimple_assign_lhs (v1
[j
]->stmt
),
4040 SLP_TREE_VEC_STMTS (node
).quick_push
4041 (vect_finish_stmt_generation (vinfo
, stmt_info
, vstmt
, &si
));
4049 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4052 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4053 For loop vectorization this is done in vectorizable_call, but for SLP
4054 it needs to be deferred until end of vect_schedule_slp, because multiple
4055 SLP instances may refer to the same scalar stmt. */
4058 vect_remove_slp_scalar_calls (vec_info
*vinfo
,
4059 slp_tree node
, hash_set
<slp_tree
> &visited
)
4062 gimple_stmt_iterator gsi
;
4066 stmt_vec_info stmt_info
;
4068 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4071 if (visited
.add (node
))
4074 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4075 vect_remove_slp_scalar_calls (vinfo
, child
, visited
);
4077 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4079 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4080 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4082 if (is_pattern_stmt_p (stmt_info
)
4083 || !PURE_SLP_STMT (stmt_info
))
4085 lhs
= gimple_call_lhs (stmt
);
4086 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4087 gsi
= gsi_for_stmt (stmt
);
4088 vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4089 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4094 vect_remove_slp_scalar_calls (vec_info
*vinfo
, slp_tree node
)
4096 hash_set
<slp_tree
> visited
;
4097 vect_remove_slp_scalar_calls (vinfo
, node
, visited
);
4100 /* Vectorize the instance root. */
4103 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
4105 gassign
*rstmt
= NULL
;
4107 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
4109 stmt_vec_info child_stmt_info
;
4112 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4114 tree vect_lhs
= gimple_get_lhs (child_stmt_info
->stmt
);
4115 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4116 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
4117 TREE_TYPE (vect_lhs
)))
4118 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
4120 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
4124 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
4126 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
4127 stmt_vec_info child_stmt_info
;
4129 vec
<constructor_elt
, va_gc
> *v
;
4130 vec_alloc (v
, nelts
);
4132 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4134 CONSTRUCTOR_APPEND_ELT (v
,
4136 gimple_get_lhs (child_stmt_info
->stmt
));
4138 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4139 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
4140 tree r_constructor
= build_constructor (rtype
, v
);
4141 rstmt
= gimple_build_assign (lhs
, r_constructor
);
4146 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
4147 gsi_replace (&rgsi
, rstmt
, true);
4150 /* Generate vector code for all SLP instances in the loop/basic block. */
4153 vect_schedule_slp (vec_info
*vinfo
)
4155 vec
<slp_instance
> slp_instances
;
4156 slp_instance instance
;
4159 slp_instances
= vinfo
->slp_instances
;
4160 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4162 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4163 /* Schedule the tree of INSTANCE. */
4164 vect_schedule_slp_instance (vinfo
, node
, instance
);
4166 if (SLP_INSTANCE_ROOT_STMT (instance
))
4167 vectorize_slp_instance_root_stmt (node
, instance
);
4169 if (dump_enabled_p ())
4170 dump_printf_loc (MSG_NOTE
, vect_location
,
4171 "vectorizing stmts using SLP.\n");
4174 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4176 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4177 stmt_vec_info store_info
;
4180 /* Remove scalar call stmts. Do not do this for basic-block
4181 vectorization as not all uses may be vectorized.
4182 ??? Why should this be necessary? DCE should be able to
4183 remove the stmts itself.
4184 ??? For BB vectorization we can as well remove scalar
4185 stmts starting from the SLP tree root if they have no
4187 if (is_a
<loop_vec_info
> (vinfo
))
4188 vect_remove_slp_scalar_calls (vinfo
, root
);
4190 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
); j
++)
4192 if (!STMT_VINFO_DATA_REF (store_info
))
4195 if (SLP_INSTANCE_ROOT_STMT (instance
))
4198 store_info
= vect_orig_stmt (store_info
);
4199 /* Free the attached stmt_vec_info and remove the stmt. */
4200 vinfo
->remove_stmt (store_info
);