1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 #include "dump-context.h"
52 static bool vectorizable_slp_permutation (vec_info
*, gimple_stmt_iterator
*,
53 slp_tree
, stmt_vector_for_cost
*);
55 /* Initialize a SLP node. */
57 _slp_tree::_slp_tree ()
59 SLP_TREE_SCALAR_STMTS (this) = vNULL
;
60 SLP_TREE_SCALAR_OPS (this) = vNULL
;
61 SLP_TREE_VEC_STMTS (this) = vNULL
;
62 SLP_TREE_VEC_DEFS (this) = vNULL
;
63 SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
64 SLP_TREE_CHILDREN (this) = vNULL
;
65 SLP_TREE_LOAD_PERMUTATION (this) = vNULL
;
66 SLP_TREE_LANE_PERMUTATION (this) = vNULL
;
67 SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def
;
68 SLP_TREE_CODE (this) = ERROR_MARK
;
69 SLP_TREE_VECTYPE (this) = NULL_TREE
;
70 SLP_TREE_REPRESENTATIVE (this) = NULL
;
71 SLP_TREE_REF_COUNT (this) = 1;
76 /* Tear down a SLP node. */
78 _slp_tree::~_slp_tree ()
80 SLP_TREE_CHILDREN (this).release ();
81 SLP_TREE_SCALAR_STMTS (this).release ();
82 SLP_TREE_SCALAR_OPS (this).release ();
83 SLP_TREE_VEC_STMTS (this).release ();
84 SLP_TREE_VEC_DEFS (this).release ();
85 SLP_TREE_LOAD_PERMUTATION (this).release ();
86 SLP_TREE_LANE_PERMUTATION (this).release ();
89 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
92 vect_free_slp_tree (slp_tree node
)
97 if (--SLP_TREE_REF_COUNT (node
) != 0)
100 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
101 vect_free_slp_tree (child
);
106 /* Return a location suitable for dumpings related to the SLP instance. */
109 _slp_instance::location () const
112 return root_stmt
->stmt
;
114 return SLP_TREE_SCALAR_STMTS (root
)[0]->stmt
;
118 /* Free the memory allocated for the SLP instance. */
121 vect_free_slp_instance (slp_instance instance
)
123 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
124 SLP_INSTANCE_LOADS (instance
).release ();
125 instance
->subgraph_entries
.release ();
126 instance
->cost_vec
.release ();
131 /* Create an SLP node for SCALAR_STMTS. */
134 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
, unsigned nops
)
136 slp_tree node
= new _slp_tree
;
137 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
138 SLP_TREE_CHILDREN (node
).create (nops
);
139 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
140 SLP_TREE_REPRESENTATIVE (node
) = scalar_stmts
[0];
141 SLP_TREE_LANES (node
) = scalar_stmts
.length ();
145 /* Create an SLP node for OPS. */
148 vect_create_new_slp_node (vec
<tree
> ops
)
150 slp_tree node
= new _slp_tree
;
151 SLP_TREE_SCALAR_OPS (node
) = ops
;
152 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
153 SLP_TREE_LANES (node
) = ops
.length ();
158 /* This structure is used in creation of an SLP tree. Each instance
159 corresponds to the same operand in a group of scalar stmts in an SLP
161 typedef struct _slp_oprnd_info
163 /* Def-stmts for the operands. */
164 vec
<stmt_vec_info
> def_stmts
;
167 /* Information about the first statement, its vector def-type, type, the
168 operand itself in case it's constant, and an indication if it's a pattern
171 enum vect_def_type first_dt
;
176 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
178 static vec
<slp_oprnd_info
>
179 vect_create_oprnd_info (int nops
, int group_size
)
182 slp_oprnd_info oprnd_info
;
183 vec
<slp_oprnd_info
> oprnds_info
;
185 oprnds_info
.create (nops
);
186 for (i
= 0; i
< nops
; i
++)
188 oprnd_info
= XNEW (struct _slp_oprnd_info
);
189 oprnd_info
->def_stmts
.create (group_size
);
190 oprnd_info
->ops
.create (group_size
);
191 oprnd_info
->first_dt
= vect_uninitialized_def
;
192 oprnd_info
->first_op_type
= NULL_TREE
;
193 oprnd_info
->any_pattern
= false;
194 oprnds_info
.quick_push (oprnd_info
);
201 /* Free operands info. */
204 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
207 slp_oprnd_info oprnd_info
;
209 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
211 oprnd_info
->def_stmts
.release ();
212 oprnd_info
->ops
.release ();
213 XDELETE (oprnd_info
);
216 oprnds_info
.release ();
220 /* Return true if STMTS contains a pattern statement. */
223 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
225 stmt_vec_info stmt_info
;
227 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
228 if (is_pattern_stmt_p (stmt_info
))
233 /* Return true when all lanes in the external or constant NODE have
237 vect_slp_tree_uniform_p (slp_tree node
)
239 gcc_assert (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
240 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
);
242 /* Pre-exsting vectors. */
243 if (SLP_TREE_SCALAR_OPS (node
).is_empty ())
247 tree op
, first
= NULL_TREE
;
248 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
251 else if (!operand_equal_p (first
, op
, 0))
257 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
258 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
262 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
263 stmt_vec_info first_stmt_info
)
265 stmt_vec_info next_stmt_info
= first_stmt_info
;
268 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
273 if (next_stmt_info
== stmt_info
)
275 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
277 result
+= DR_GROUP_GAP (next_stmt_info
);
279 while (next_stmt_info
);
284 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
285 using the method implemented by duplicate_and_interleave. Return true
286 if so, returning the number of intermediate vectors in *NVECTORS_OUT
287 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
291 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
292 tree elt_type
, unsigned int *nvectors_out
,
293 tree
*vector_type_out
,
296 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
297 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
300 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
301 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
302 unsigned int nvectors
= 1;
305 scalar_int_mode int_mode
;
306 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
307 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
309 /* Get the natural vector type for this SLP group size. */
310 tree int_type
= build_nonstandard_integer_type
311 (GET_MODE_BITSIZE (int_mode
), 1);
313 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
315 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
316 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
317 GET_MODE_SIZE (base_vector_mode
)))
319 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
320 together into elements of type INT_TYPE and using the result
321 to build NVECTORS vectors. */
322 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
323 vec_perm_builder
sel1 (nelts
, 2, 3);
324 vec_perm_builder
sel2 (nelts
, 2, 3);
325 poly_int64 half_nelts
= exact_div (nelts
, 2);
326 for (unsigned int i
= 0; i
< 3; ++i
)
329 sel1
.quick_push (i
+ nelts
);
330 sel2
.quick_push (half_nelts
+ i
);
331 sel2
.quick_push (half_nelts
+ i
+ nelts
);
333 vec_perm_indices
indices1 (sel1
, 2, nelts
);
334 vec_perm_indices
indices2 (sel2
, 2, nelts
);
335 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
336 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
339 *nvectors_out
= nvectors
;
341 *vector_type_out
= vector_type
;
344 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
346 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
353 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
359 /* Return true if DTA and DTB match. */
362 vect_def_types_match (enum vect_def_type dta
, enum vect_def_type dtb
)
365 || ((dta
== vect_external_def
|| dta
== vect_constant_def
)
366 && (dtb
== vect_external_def
|| dtb
== vect_constant_def
)));
369 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
370 they are of a valid type and that they match the defs of the first stmt of
371 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
372 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
373 indicates swap is required for cond_expr stmts. Specifically, *SWAP
374 is 1 if STMT is cond and operands of comparison need to be swapped;
375 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
376 If there is any operand swap in this function, *SWAP is set to non-zero
378 If there was a fatal error return -1; if the error could be corrected by
379 swapping operands of father node of this one, return 1; if everything is
382 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char swap
,
383 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
384 vec
<slp_oprnd_info
> *oprnds_info
)
386 stmt_vec_info stmt_info
= stmts
[stmt_num
];
388 unsigned int i
, number_of_oprnds
;
389 enum vect_def_type dt
= vect_uninitialized_def
;
390 slp_oprnd_info oprnd_info
;
391 int first_op_idx
= 1;
392 unsigned int commutative_op
= -1U;
393 bool first_op_cond
= false;
394 bool first
= stmt_num
== 0;
396 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
398 number_of_oprnds
= gimple_call_num_args (stmt
);
400 if (gimple_call_internal_p (stmt
))
402 internal_fn ifn
= gimple_call_internal_fn (stmt
);
403 commutative_op
= first_commutative_argument (ifn
);
405 /* Masked load, only look at mask. */
406 if (ifn
== IFN_MASK_LOAD
)
408 number_of_oprnds
= 1;
409 /* Mask operand index. */
414 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
416 enum tree_code code
= gimple_assign_rhs_code (stmt
);
417 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
418 /* Swap can only be done for cond_expr if asked to, otherwise we
419 could result in different comparison code to the first stmt. */
420 if (code
== COND_EXPR
421 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
423 first_op_cond
= true;
427 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
432 bool swapped
= (swap
!= 0);
433 gcc_assert (!swapped
|| first_op_cond
);
434 enum vect_def_type
*dts
= XALLOCAVEC (enum vect_def_type
, number_of_oprnds
);
435 for (i
= 0; i
< number_of_oprnds
; i
++)
439 /* Map indicating how operands of cond_expr should be swapped. */
440 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
441 int *map
= maps
[swap
];
444 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
445 first_op_idx
), map
[i
]);
447 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
450 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
451 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
452 oprnd
= TREE_OPERAND (oprnd
, 0);
454 oprnd_info
= (*oprnds_info
)[i
];
456 stmt_vec_info def_stmt_info
;
457 if (!vect_is_simple_use (oprnd
, vinfo
, &dts
[i
], &def_stmt_info
))
459 if (dump_enabled_p ())
460 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
461 "Build SLP failed: can't analyze def for %T\n",
467 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
468 oprnd_info
->any_pattern
= true;
470 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
471 oprnd_info
->ops
.quick_push (oprnd
);
475 tree type
= TREE_TYPE (oprnd
);
477 if ((dt
== vect_constant_def
478 || dt
== vect_external_def
)
479 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
480 && (TREE_CODE (type
) == BOOLEAN_TYPE
481 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
484 if (dump_enabled_p ())
485 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
486 "Build SLP failed: invalid type of def "
487 "for variable-length SLP %T\n", oprnd
);
491 /* For the swapping logic below force vect_reduction_def
492 for the reduction op in a SLP reduction group. */
493 if (!STMT_VINFO_DATA_REF (stmt_info
)
494 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
495 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
497 dts
[i
] = dt
= vect_reduction_def
;
499 /* Check the types of the definition. */
502 case vect_external_def
:
503 case vect_constant_def
:
504 case vect_internal_def
:
505 case vect_reduction_def
:
506 case vect_induction_def
:
510 /* FORNOW: Not supported. */
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
513 "Build SLP failed: illegal type of def %T\n",
518 oprnd_info
->first_dt
= dt
;
519 oprnd_info
->first_op_type
= type
;
525 /* Now match the operand definition types to that of the first stmt. */
526 for (i
= 0; i
< number_of_oprnds
;)
528 oprnd_info
= (*oprnds_info
)[i
];
530 stmt_vec_info def_stmt_info
= oprnd_info
->def_stmts
[stmt_num
];
531 oprnd
= oprnd_info
->ops
[stmt_num
];
532 tree type
= TREE_TYPE (oprnd
);
534 if (!types_compatible_p (oprnd_info
->first_op_type
, type
))
536 if (dump_enabled_p ())
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
538 "Build SLP failed: different operand types\n");
542 /* Not first stmt of the group, check that the def-stmt/s match
543 the def-stmt/s of the first stmt. Allow different definition
544 types for reduction chains: the first stmt must be a
545 vect_reduction_def (a phi node), and the rest
546 end in the reduction chain. */
547 if ((!vect_def_types_match (oprnd_info
->first_dt
, dt
)
548 && !(oprnd_info
->first_dt
== vect_reduction_def
549 && !STMT_VINFO_DATA_REF (stmt_info
)
550 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
552 && !STMT_VINFO_DATA_REF (def_stmt_info
)
553 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
554 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
))))
555 || (!STMT_VINFO_DATA_REF (stmt_info
)
556 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
558 || STMT_VINFO_DATA_REF (def_stmt_info
)
559 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
560 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
561 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
563 /* Try swapping operands if we got a mismatch. For BB
564 vectorization only in case it will clearly improve things. */
565 if (i
== commutative_op
&& !swapped
566 && (!is_a
<bb_vec_info
> (vinfo
)
567 || (!vect_def_types_match ((*oprnds_info
)[i
+1]->first_dt
,
569 && (vect_def_types_match (oprnd_info
->first_dt
, dts
[i
+1])
570 || vect_def_types_match
571 ((*oprnds_info
)[i
+1]->first_dt
, dts
[i
])))))
573 if (dump_enabled_p ())
574 dump_printf_loc (MSG_NOTE
, vect_location
,
575 "trying swapped operands\n");
576 std::swap (dts
[i
], dts
[i
+1]);
577 std::swap ((*oprnds_info
)[i
]->def_stmts
[stmt_num
],
578 (*oprnds_info
)[i
+1]->def_stmts
[stmt_num
]);
579 std::swap ((*oprnds_info
)[i
]->ops
[stmt_num
],
580 (*oprnds_info
)[i
+1]->ops
[stmt_num
]);
585 if (is_a
<bb_vec_info
> (vinfo
))
587 /* Now for commutative ops we should see whether we can
588 make the other operand matching. */
589 if (dump_enabled_p ())
590 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
591 "treating operand as external\n");
592 oprnd_info
->first_dt
= dt
= vect_external_def
;
596 if (dump_enabled_p ())
597 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
598 "Build SLP failed: different types\n");
603 /* Make sure to demote the overall operand to external. */
604 if (dt
== vect_external_def
)
605 oprnd_info
->first_dt
= vect_external_def
;
606 /* For a SLP reduction chain we want to duplicate the reduction to
607 each of the chain members. That gets us a sane SLP graph (still
608 the stmts are not 100% correct wrt the initial values). */
609 else if ((dt
== vect_internal_def
610 || dt
== vect_reduction_def
)
611 && oprnd_info
->first_dt
== vect_reduction_def
612 && !STMT_VINFO_DATA_REF (stmt_info
)
613 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
614 && !STMT_VINFO_DATA_REF (def_stmt_info
)
615 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
616 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
618 oprnd_info
->def_stmts
[stmt_num
] = oprnd_info
->def_stmts
[0];
619 oprnd_info
->ops
[stmt_num
] = oprnd_info
->ops
[0];
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE
, vect_location
,
630 "swapped operands to match def types in %G",
637 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
638 Return true if we can, meaning that this choice doesn't conflict with
639 existing SLP nodes that use STMT_INFO. */
642 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
644 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
646 return useless_type_conversion_p (vectype
, old_vectype
);
648 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
650 /* We maintain the invariant that if any statement in the group is
651 used, all other members of the group have the same vector type. */
652 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
653 stmt_vec_info member_info
= first_info
;
654 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
655 if (is_pattern_stmt_p (member_info
)
656 && !useless_type_conversion_p (vectype
,
657 STMT_VINFO_VECTYPE (member_info
)))
662 for (member_info
= first_info
; member_info
;
663 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
664 STMT_VINFO_VECTYPE (member_info
) = vectype
;
668 else if (!is_pattern_stmt_p (stmt_info
))
670 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
674 if (dump_enabled_p ())
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
677 "Build SLP failed: incompatible vector"
678 " types for: %G", stmt_info
->stmt
);
679 dump_printf_loc (MSG_NOTE
, vect_location
,
680 " old vector type: %T\n", old_vectype
);
681 dump_printf_loc (MSG_NOTE
, vect_location
,
682 " new vector type: %T\n", vectype
);
687 /* Return true if call statements CALL1 and CALL2 are similar enough
688 to be combined into the same SLP group. */
691 compatible_calls_p (gcall
*call1
, gcall
*call2
)
693 unsigned int nargs
= gimple_call_num_args (call1
);
694 if (nargs
!= gimple_call_num_args (call2
))
697 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
700 if (gimple_call_internal_p (call1
))
702 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
703 TREE_TYPE (gimple_call_lhs (call2
))))
705 for (unsigned int i
= 0; i
< nargs
; ++i
)
706 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
707 TREE_TYPE (gimple_call_arg (call2
, i
))))
712 if (!operand_equal_p (gimple_call_fn (call1
),
713 gimple_call_fn (call2
), 0))
716 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
722 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
723 caller's attempt to find the vector type in STMT_INFO with the narrowest
724 element type. Return true if VECTYPE is nonnull and if it is valid
725 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
726 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
727 vect_build_slp_tree. */
730 vect_record_max_nunits (vec_info
*vinfo
, stmt_vec_info stmt_info
,
731 unsigned int group_size
,
732 tree vectype
, poly_uint64
*max_nunits
)
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
738 "Build SLP failed: unsupported data-type in %G\n",
740 /* Fatal mismatch. */
744 /* If populating the vector type requires unrolling then fail
745 before adjusting *max_nunits for basic-block vectorization. */
746 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
747 unsigned HOST_WIDE_INT const_nunits
;
748 if (is_a
<bb_vec_info
> (vinfo
)
749 && (!nunits
.is_constant (&const_nunits
)
750 || const_nunits
> group_size
))
752 if (dump_enabled_p ())
753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
754 "Build SLP failed: unrolling required "
755 "in basic block SLP\n");
756 /* Fatal mismatch. */
760 /* In case of multiple types we need to detect the smallest type. */
761 vect_update_max_nunits (max_nunits
, vectype
);
765 /* Verify if the scalar stmts STMTS are isomorphic, require data
766 permutation or are of unsupported types of operation. Return
767 true if they are, otherwise return false and indicate in *MATCHES
768 which stmts are not isomorphic to the first one. If MATCHES[0]
769 is false then this indicates the comparison could not be
770 carried out or the stmts will never be vectorized by SLP.
772 Note COND_EXPR is possibly isomorphic to another one after swapping its
773 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
774 the first stmt by swapping the two operands of comparison; set SWAP[i]
775 to 2 if stmt I is isormorphic to the first stmt by inverting the code
776 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
777 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
780 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
781 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
782 poly_uint64
*max_nunits
, bool *matches
,
783 bool *two_operators
, tree
*node_vectype
)
786 stmt_vec_info first_stmt_info
= stmts
[0];
787 enum tree_code first_stmt_code
= ERROR_MARK
;
788 enum tree_code alt_stmt_code
= ERROR_MARK
;
789 enum tree_code rhs_code
= ERROR_MARK
;
790 enum tree_code first_cond_code
= ERROR_MARK
;
792 bool need_same_oprnds
= false;
793 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
796 machine_mode optab_op2_mode
;
797 machine_mode vec_mode
;
798 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
799 bool first_stmt_load_p
= false, load_p
= false;
801 /* For every stmt in NODE find its def stmt/s. */
802 stmt_vec_info stmt_info
;
803 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
805 gimple
*stmt
= stmt_info
->stmt
;
809 if (dump_enabled_p ())
810 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
812 /* Fail to vectorize statements marked as unvectorizable, throw
814 if (!STMT_VINFO_VECTORIZABLE (stmt_info
)
815 || stmt_can_throw_internal (cfun
, stmt
)
816 || gimple_has_volatile_ops (stmt
))
818 if (dump_enabled_p ())
819 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
820 "Build SLP failed: unvectorizable statement %G",
822 /* ??? For BB vectorization we want to commutate operands in a way
823 to shuffle all unvectorizable defs into one operand and have
824 the other still vectorized. The following doesn't reliably
825 work for this though but it's the easiest we can do here. */
826 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
828 /* Fatal mismatch. */
833 lhs
= gimple_get_lhs (stmt
);
834 if (lhs
== NULL_TREE
)
836 if (dump_enabled_p ())
837 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
838 "Build SLP failed: not GIMPLE_ASSIGN nor "
839 "GIMPLE_CALL %G", stmt
);
840 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
842 /* Fatal mismatch. */
848 if (!vect_get_vector_types_for_stmt (vinfo
, stmt_info
, &vectype
,
849 &nunits_vectype
, group_size
)
851 && !vect_record_max_nunits (vinfo
, stmt_info
, group_size
,
852 nunits_vectype
, max_nunits
)))
854 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
856 /* Fatal mismatch. */
861 gcc_assert (vectype
);
863 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
866 rhs_code
= CALL_EXPR
;
868 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
870 else if ((gimple_call_internal_p (call_stmt
)
871 && (!vectorizable_internal_fn_p
872 (gimple_call_internal_fn (call_stmt
))))
873 || gimple_call_tail_p (call_stmt
)
874 || gimple_call_noreturn_p (call_stmt
)
875 || !gimple_call_nothrow_p (call_stmt
)
876 || gimple_call_chain (call_stmt
))
878 if (dump_enabled_p ())
879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
880 "Build SLP failed: unsupported call type %G",
882 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
884 /* Fatal mismatch. */
891 rhs_code
= gimple_assign_rhs_code (stmt
);
892 load_p
= gimple_vuse (stmt
);
895 /* Check the operation. */
898 *node_vectype
= vectype
;
899 first_stmt_code
= rhs_code
;
900 first_stmt_load_p
= load_p
;
902 /* Shift arguments should be equal in all the packed stmts for a
903 vector shift with scalar shift operand. */
904 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
905 || rhs_code
== LROTATE_EXPR
906 || rhs_code
== RROTATE_EXPR
)
908 vec_mode
= TYPE_MODE (vectype
);
910 /* First see if we have a vector/vector shift. */
911 optab
= optab_for_tree_code (rhs_code
, vectype
,
915 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
917 /* No vector/vector shift, try for a vector/scalar shift. */
918 optab
= optab_for_tree_code (rhs_code
, vectype
,
923 if (dump_enabled_p ())
924 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
925 "Build SLP failed: no optab.\n");
926 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
928 /* Fatal mismatch. */
932 icode
= (int) optab_handler (optab
, vec_mode
);
933 if (icode
== CODE_FOR_nothing
)
935 if (dump_enabled_p ())
936 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
938 "op not supported by target.\n");
939 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
941 /* Fatal mismatch. */
945 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
946 if (!VECTOR_MODE_P (optab_op2_mode
))
948 need_same_oprnds
= true;
949 first_op1
= gimple_assign_rhs2 (stmt
);
953 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
955 need_same_oprnds
= true;
956 first_op1
= gimple_assign_rhs2 (stmt
);
959 && rhs_code
== BIT_FIELD_REF
)
961 tree vec
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0);
962 if (TREE_CODE (vec
) != SSA_NAME
963 || !types_compatible_p (vectype
, TREE_TYPE (vec
)))
965 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
967 if (dump_enabled_p ())
968 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
970 "BIT_FIELD_REF not supported\n");
971 /* Fatal mismatch. */
977 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
979 need_same_oprnds
= true;
980 first_op1
= gimple_call_arg (call_stmt
, 1);
985 if (first_stmt_code
!= rhs_code
986 && alt_stmt_code
== ERROR_MARK
)
987 alt_stmt_code
= rhs_code
;
988 if ((first_stmt_code
!= rhs_code
989 && (first_stmt_code
!= IMAGPART_EXPR
990 || rhs_code
!= REALPART_EXPR
)
991 && (first_stmt_code
!= REALPART_EXPR
992 || rhs_code
!= IMAGPART_EXPR
)
993 /* Handle mismatches in plus/minus by computing both
994 and merging the results. */
995 && !((first_stmt_code
== PLUS_EXPR
996 || first_stmt_code
== MINUS_EXPR
)
997 && (alt_stmt_code
== PLUS_EXPR
998 || alt_stmt_code
== MINUS_EXPR
)
999 && rhs_code
== alt_stmt_code
)
1000 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1001 && (first_stmt_code
== ARRAY_REF
1002 || first_stmt_code
== BIT_FIELD_REF
1003 || first_stmt_code
== INDIRECT_REF
1004 || first_stmt_code
== COMPONENT_REF
1005 || first_stmt_code
== MEM_REF
)))
1006 || first_stmt_load_p
!= load_p
)
1008 if (dump_enabled_p ())
1010 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1011 "Build SLP failed: different operation "
1012 "in stmt %G", stmt
);
1013 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1014 "original stmt %G", first_stmt_info
->stmt
);
1020 if (need_same_oprnds
)
1022 tree other_op1
= (call_stmt
1023 ? gimple_call_arg (call_stmt
, 1)
1024 : gimple_assign_rhs2 (stmt
));
1025 if (!operand_equal_p (first_op1
, other_op1
, 0))
1027 if (dump_enabled_p ())
1028 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1029 "Build SLP failed: different shift "
1030 "arguments in %G", stmt
);
1036 && first_stmt_code
== BIT_FIELD_REF
1037 && (TREE_OPERAND (gimple_assign_rhs1 (first_stmt_info
->stmt
), 0)
1038 != TREE_OPERAND (gimple_assign_rhs1 (stmt_info
->stmt
), 0)))
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1042 "Build SLP failed: different BIT_FIELD_REF "
1043 "arguments in %G", stmt
);
1048 if (!load_p
&& rhs_code
== CALL_EXPR
)
1050 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
1051 as_a
<gcall
*> (stmt
)))
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1055 "Build SLP failed: different calls in %G",
1062 if (!types_compatible_p (vectype
, *node_vectype
))
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1066 "Build SLP failed: different vector type "
1073 /* Grouped store or load. */
1074 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1076 if (REFERENCE_CLASS_P (lhs
))
1084 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1085 if (prev_first_load
)
1087 /* Check that there are no loads from different interleaving
1088 chains in the same node. */
1089 if (prev_first_load
!= first_load
)
1091 if (dump_enabled_p ())
1092 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1094 "Build SLP failed: different "
1095 "interleaving chains in one node %G",
1102 prev_first_load
= first_load
;
1104 } /* Grouped access. */
1109 /* Not grouped load. */
1110 if (dump_enabled_p ())
1111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1112 "Build SLP failed: not grouped load %G", stmt
);
1114 /* FORNOW: Not grouped loads are not supported. */
1115 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
1117 /* Fatal mismatch. */
1122 /* Not memory operation. */
1123 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
1124 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1125 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1126 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1127 && rhs_code
!= VIEW_CONVERT_EXPR
1128 && rhs_code
!= CALL_EXPR
1129 && rhs_code
!= BIT_FIELD_REF
)
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1133 "Build SLP failed: operation unsupported %G",
1135 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
1137 /* Fatal mismatch. */
1142 if (rhs_code
== COND_EXPR
)
1144 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1145 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1146 enum tree_code swap_code
= ERROR_MARK
;
1147 enum tree_code invert_code
= ERROR_MARK
;
1150 first_cond_code
= TREE_CODE (cond_expr
);
1151 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1153 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1154 swap_code
= swap_tree_comparison (cond_code
);
1155 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1158 if (first_cond_code
== cond_code
)
1160 /* Isomorphic can be achieved by swapping. */
1161 else if (first_cond_code
== swap_code
)
1163 /* Isomorphic can be achieved by inverting. */
1164 else if (first_cond_code
== invert_code
)
1168 if (dump_enabled_p ())
1169 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1170 "Build SLP failed: different"
1171 " operation %G", stmt
);
1181 for (i
= 0; i
< group_size
; ++i
)
1185 /* If we allowed a two-operation SLP node verify the target can cope
1186 with the permute we are going to use. */
1187 if (alt_stmt_code
!= ERROR_MARK
1188 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1190 *two_operators
= true;
1196 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1197 Note we never remove apart from at destruction time so we do not
1198 need a special value for deleted that differs from empty. */
1201 typedef vec
<stmt_vec_info
> value_type
;
1202 typedef vec
<stmt_vec_info
> compare_type
;
1203 static inline hashval_t
hash (value_type
);
1204 static inline bool equal (value_type existing
, value_type candidate
);
1205 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1206 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1207 static const bool empty_zero_p
= true;
1208 static inline void mark_empty (value_type
&x
) { x
.release (); }
1209 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1210 static inline void remove (value_type
&x
) { x
.release (); }
1213 bst_traits::hash (value_type x
)
1216 for (unsigned i
= 0; i
< x
.length (); ++i
)
1217 h
.add_int (gimple_uid (x
[i
]->stmt
));
1221 bst_traits::equal (value_type existing
, value_type candidate
)
1223 if (existing
.length () != candidate
.length ())
1225 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1226 if (existing
[i
] != candidate
[i
])
1231 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1232 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1233 scalar_stmts_to_slp_tree_map_t
;
1236 vect_build_slp_tree_2 (vec_info
*vinfo
,
1237 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1238 poly_uint64
*max_nunits
,
1239 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1240 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1243 vect_build_slp_tree (vec_info
*vinfo
,
1244 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1245 poly_uint64
*max_nunits
,
1246 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1247 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1249 if (slp_tree
*leader
= bst_map
->get (stmts
))
1251 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1253 *leader
? "" : "failed ", *leader
);
1256 SLP_TREE_REF_COUNT (*leader
)++;
1257 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1261 poly_uint64 this_max_nunits
= 1;
1262 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
,
1264 matches
, npermutes
, tree_size
, bst_map
);
1267 res
->max_nunits
= this_max_nunits
;
1268 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1269 /* Keep a reference for the bst_map use. */
1270 SLP_TREE_REF_COUNT (res
)++;
1272 bst_map
->put (stmts
.copy (), res
);
1276 /* Recursively build an SLP tree starting from NODE.
1277 Fail (and return a value not equal to zero) if def-stmts are not
1278 isomorphic, require data permutation or are of unsupported types of
1279 operation. Otherwise, return 0.
1280 The value returned is the depth in the SLP tree where a mismatch
1284 vect_build_slp_tree_2 (vec_info
*vinfo
,
1285 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1286 poly_uint64
*max_nunits
,
1287 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1288 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1290 unsigned nops
, i
, this_tree_size
= 0;
1291 poly_uint64 this_max_nunits
= *max_nunits
;
1296 stmt_vec_info stmt_info
= stmts
[0];
1297 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1298 nops
= gimple_call_num_args (stmt
);
1299 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1301 nops
= gimple_num_ops (stmt
) - 1;
1302 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1305 else if (gphi
*phi
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1306 nops
= gimple_phi_num_args (phi
);
1310 /* If the SLP node is a PHI (induction or reduction), terminate
1312 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1314 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1315 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
1317 if (!vect_record_max_nunits (vinfo
, stmt_info
, group_size
, vectype
,
1321 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1322 /* Induction from different IVs is not supported. */
1323 if (def_type
== vect_induction_def
)
1325 stmt_vec_info other_info
;
1326 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1327 if (stmt_info
!= other_info
)
1330 else if (def_type
== vect_reduction_def
1331 || def_type
== vect_double_reduction_def
1332 || def_type
== vect_nested_cycle
)
1334 /* Else def types have to match. */
1335 stmt_vec_info other_info
;
1336 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1337 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1343 node
= vect_create_new_slp_node (stmts
, nops
);
1344 SLP_TREE_VECTYPE (node
) = vectype
;
1349 bool two_operators
= false;
1350 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1351 tree vectype
= NULL_TREE
;
1352 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1353 &this_max_nunits
, matches
, &two_operators
,
1357 /* If the SLP node is a load, terminate the recursion unless masked. */
1358 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1359 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1361 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1364 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1369 *max_nunits
= this_max_nunits
;
1371 node
= vect_create_new_slp_node (stmts
, 0);
1372 SLP_TREE_VECTYPE (node
) = vectype
;
1373 /* And compute the load permutation. Whether it is actually
1374 a permutation depends on the unrolling factor which is
1376 vec
<unsigned> load_permutation
;
1378 stmt_vec_info load_info
;
1379 load_permutation
.create (group_size
);
1380 stmt_vec_info first_stmt_info
1381 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1382 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1384 int load_place
= vect_get_place_in_interleaving_chain
1385 (load_info
, first_stmt_info
);
1386 gcc_assert (load_place
!= -1);
1387 load_permutation
.safe_push (load_place
);
1389 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1393 else if (gimple_assign_single_p (stmt_info
->stmt
)
1394 && !gimple_vuse (stmt_info
->stmt
)
1395 && gimple_assign_rhs_code (stmt_info
->stmt
) == BIT_FIELD_REF
)
1397 /* vect_build_slp_tree_2 determined all BIT_FIELD_REFs reference
1398 the same SSA name vector of a compatible type to vectype. */
1399 vec
<std::pair
<unsigned, unsigned> > lperm
= vNULL
;
1400 tree vec
= TREE_OPERAND (gimple_assign_rhs1 (stmt_info
->stmt
), 0);
1401 stmt_vec_info estmt_info
;
1402 FOR_EACH_VEC_ELT (stmts
, i
, estmt_info
)
1404 gassign
*estmt
= as_a
<gassign
*> (estmt_info
->stmt
);
1405 tree bfref
= gimple_assign_rhs1 (estmt
);
1407 if (!known_eq (bit_field_size (bfref
),
1408 tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (vectype
))))
1409 || !constant_multiple_p (bit_field_offset (bfref
),
1410 bit_field_size (bfref
), &lane
))
1415 lperm
.safe_push (std::make_pair (0, (unsigned)lane
));
1417 slp_tree vnode
= vect_create_new_slp_node (vNULL
);
1418 SLP_TREE_VECTYPE (vnode
) = TREE_TYPE (vec
);
1419 SLP_TREE_VEC_DEFS (vnode
).safe_push (vec
);
1420 /* We are always building a permutation node even if it is an identity
1421 permute to shield the rest of the vectorizer from the odd node
1422 representing an actual vector without any scalar ops.
1423 ??? We could hide it completely with making the permute node
1425 node
= vect_create_new_slp_node (stmts
, 1);
1426 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
1427 SLP_TREE_LANE_PERMUTATION (node
) = lperm
;
1428 SLP_TREE_VECTYPE (node
) = vectype
;
1429 SLP_TREE_CHILDREN (node
).quick_push (vnode
);
1433 /* Get at the operands, verifying they are compatible. */
1434 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1435 slp_oprnd_info oprnd_info
;
1436 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1438 int res
= vect_get_and_check_slp_defs (vinfo
, swap
[i
],
1439 stmts
, i
, &oprnds_info
);
1441 matches
[(res
== -1) ? 0 : i
] = false;
1445 for (i
= 0; i
< group_size
; ++i
)
1448 vect_free_oprnd_info (oprnds_info
);
1453 auto_vec
<slp_tree
, 4> children
;
1455 stmt_info
= stmts
[0];
1457 /* Create SLP_TREE nodes for the definition node/s. */
1458 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1463 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1465 /* COND_EXPR have one too many eventually if the condition
1467 gcc_assert (i
== 3 && nops
== 4);
1471 if (oprnd_info
->first_dt
!= vect_internal_def
1472 && oprnd_info
->first_dt
!= vect_reduction_def
1473 && oprnd_info
->first_dt
!= vect_induction_def
)
1475 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1476 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1477 oprnd_info
->ops
= vNULL
;
1478 children
.safe_push (invnode
);
1482 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1483 group_size
, &this_max_nunits
,
1485 &this_tree_size
, bst_map
)) != NULL
)
1487 oprnd_info
->def_stmts
= vNULL
;
1488 children
.safe_push (child
);
1492 /* If the SLP build for operand zero failed and operand zero
1493 and one can be commutated try that for the scalar stmts
1494 that failed the match. */
1496 /* A first scalar stmt mismatch signals a fatal mismatch. */
1498 /* ??? For COND_EXPRs we can swap the comparison operands
1499 as well as the arms under some constraints. */
1501 && oprnds_info
[1]->first_dt
== vect_internal_def
1502 && is_gimple_assign (stmt_info
->stmt
)
1503 /* Swapping operands for reductions breaks assumptions later on. */
1504 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1505 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1506 /* Do so only if the number of not successful permutes was nor more
1507 than a cut-ff as re-trying the recursive match on
1508 possibly each level of the tree would expose exponential
1512 /* See whether we can swap the matching or the non-matching
1514 bool swap_not_matching
= true;
1517 for (j
= 0; j
< group_size
; ++j
)
1519 if (matches
[j
] != !swap_not_matching
)
1521 stmt_vec_info stmt_info
= stmts
[j
];
1522 /* Verify if we can swap operands of this stmt. */
1523 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1525 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1527 if (!swap_not_matching
)
1529 swap_not_matching
= false;
1534 while (j
!= group_size
);
1536 /* Swap mismatched definition stmts. */
1537 if (dump_enabled_p ())
1538 dump_printf_loc (MSG_NOTE
, vect_location
,
1539 "Re-trying with swapped operands of stmts ");
1540 for (j
= 0; j
< group_size
; ++j
)
1541 if (matches
[j
] == !swap_not_matching
)
1543 std::swap (oprnds_info
[0]->def_stmts
[j
],
1544 oprnds_info
[1]->def_stmts
[j
]);
1545 std::swap (oprnds_info
[0]->ops
[j
],
1546 oprnds_info
[1]->ops
[j
]);
1547 if (dump_enabled_p ())
1548 dump_printf (MSG_NOTE
, "%d ", j
);
1550 if (dump_enabled_p ())
1551 dump_printf (MSG_NOTE
, "\n");
1552 /* And try again with scratch 'matches' ... */
1553 bool *tem
= XALLOCAVEC (bool, group_size
);
1554 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1555 group_size
, &this_max_nunits
,
1557 &this_tree_size
, bst_map
)) != NULL
)
1559 oprnd_info
->def_stmts
= vNULL
;
1560 children
.safe_push (child
);
1563 /* We do not undo the swapping here since it might still be
1564 the better order for the second operand in case we build
1565 the first one from scalars below. */
1570 /* If the SLP build failed and we analyze a basic-block
1571 simply treat nodes we fail to build as externally defined
1572 (and thus build vectors from the scalar defs).
1573 The cost model will reject outright expensive cases.
1574 ??? This doesn't treat cases where permutation ultimatively
1575 fails (or we don't try permutation below). Ideally we'd
1576 even compute a permutation that will end up with the maximum
1578 if (is_a
<bb_vec_info
> (vinfo
)
1579 /* ??? Rejecting patterns this way doesn't work. We'd have to
1580 do extra work to cancel the pattern so the uses see the
1582 && !is_pattern_stmt_p (stmt_info
)
1583 && !oprnd_info
->any_pattern
)
1585 /* But if there's a leading vector sized set of matching stmts
1586 fail here so we can split the group. This matches the condition
1587 vect_analyze_slp_instance uses. */
1588 /* ??? We might want to split here and combine the results to support
1589 multiple vector sizes better. */
1590 for (j
= 0; j
< group_size
; ++j
)
1593 if (!known_ge (j
, TYPE_VECTOR_SUBPARTS (vectype
)))
1595 if (dump_enabled_p ())
1596 dump_printf_loc (MSG_NOTE
, vect_location
,
1597 "Building vector operands from scalars\n");
1599 child
= vect_create_new_slp_node (oprnd_info
->ops
);
1600 children
.safe_push (child
);
1601 oprnd_info
->ops
= vNULL
;
1602 oprnd_info
->def_stmts
= vNULL
;
1607 gcc_assert (child
== NULL
);
1608 FOR_EACH_VEC_ELT (children
, j
, child
)
1609 vect_free_slp_tree (child
);
1610 vect_free_oprnd_info (oprnds_info
);
1614 vect_free_oprnd_info (oprnds_info
);
1616 /* If we have all children of a child built up from uniform scalars
1617 or does more than one possibly expensive vector construction then
1618 just throw that away, causing it built up from scalars.
1619 The exception is the SLP node for the vector store. */
1620 if (is_a
<bb_vec_info
> (vinfo
)
1621 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1622 /* ??? Rejecting patterns this way doesn't work. We'd have to
1623 do extra work to cancel the pattern so the uses see the
1625 && !is_pattern_stmt_p (stmt_info
))
1629 bool all_uniform_p
= true;
1630 unsigned n_vector_builds
= 0;
1631 FOR_EACH_VEC_ELT (children
, j
, child
)
1633 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1634 all_uniform_p
= false;
1635 else if (!vect_slp_tree_uniform_p (child
))
1637 all_uniform_p
= false;
1638 if (SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
1642 if (all_uniform_p
|| n_vector_builds
> 1)
1646 FOR_EACH_VEC_ELT (children
, j
, child
)
1647 vect_free_slp_tree (child
);
1649 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_NOTE
, vect_location
,
1651 "Building parent vector operands from "
1652 "scalars instead\n");
1657 *tree_size
+= this_tree_size
+ 1;
1658 *max_nunits
= this_max_nunits
;
1662 /* ??? We'd likely want to either cache in bst_map sth like
1663 { a+b, NULL, a+b, NULL } and { NULL, a-b, NULL, a-b } or
1664 the true { a+b, a+b, a+b, a+b } ... but there we don't have
1665 explicit stmts to put in so the keying on 'stmts' doesn't
1666 work (but we have the same issue with nodes that use 'ops'). */
1667 slp_tree one
= new _slp_tree
;
1668 slp_tree two
= new _slp_tree
;
1669 SLP_TREE_DEF_TYPE (one
) = vect_internal_def
;
1670 SLP_TREE_DEF_TYPE (two
) = vect_internal_def
;
1671 SLP_TREE_VECTYPE (one
) = vectype
;
1672 SLP_TREE_VECTYPE (two
) = vectype
;
1673 SLP_TREE_CHILDREN (one
).safe_splice (children
);
1674 SLP_TREE_CHILDREN (two
).safe_splice (children
);
1676 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (two
), i
, child
)
1677 SLP_TREE_REF_COUNT (child
)++;
1679 /* Here we record the original defs since this
1680 node represents the final lane configuration. */
1681 node
= vect_create_new_slp_node (stmts
, 2);
1682 SLP_TREE_VECTYPE (node
) = vectype
;
1683 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
1684 SLP_TREE_CHILDREN (node
).quick_push (one
);
1685 SLP_TREE_CHILDREN (node
).quick_push (two
);
1686 gassign
*stmt
= as_a
<gassign
*> (stmts
[0]->stmt
);
1687 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
1688 enum tree_code ocode
= ERROR_MARK
;
1689 stmt_vec_info ostmt_info
;
1691 FOR_EACH_VEC_ELT (stmts
, i
, ostmt_info
)
1693 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
1694 if (gimple_assign_rhs_code (ostmt
) != code0
)
1696 SLP_TREE_LANE_PERMUTATION (node
).safe_push (std::make_pair (1, i
));
1697 ocode
= gimple_assign_rhs_code (ostmt
);
1701 SLP_TREE_LANE_PERMUTATION (node
).safe_push (std::make_pair (0, i
));
1703 SLP_TREE_CODE (one
) = code0
;
1704 SLP_TREE_CODE (two
) = ocode
;
1705 SLP_TREE_LANES (one
) = stmts
.length ();
1706 SLP_TREE_LANES (two
) = stmts
.length ();
1707 SLP_TREE_REPRESENTATIVE (one
) = stmts
[0];
1708 SLP_TREE_REPRESENTATIVE (two
) = stmts
[j
];
1712 node
= vect_create_new_slp_node (stmts
, nops
);
1713 SLP_TREE_VECTYPE (node
) = vectype
;
1714 SLP_TREE_CHILDREN (node
).splice (children
);
1718 /* Dump a single SLP tree NODE. */
1721 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1726 stmt_vec_info stmt_info
;
1729 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1730 dump_user_location_t user_loc
= loc
.get_user_location ();
1731 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1732 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1734 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1737 estimated_poly_value (node
->max_nunits
),
1738 SLP_TREE_REF_COUNT (node
));
1739 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1740 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1741 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1744 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1745 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1746 dump_printf (metadata
, "%T%s ", op
,
1747 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1748 dump_printf (metadata
, "}\n");
1750 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1752 dump_printf_loc (metadata
, user_loc
, "\tload permutation {");
1753 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), i
, j
)
1754 dump_printf (dump_kind
, " %u", j
);
1755 dump_printf (dump_kind
, " }\n");
1757 if (SLP_TREE_LANE_PERMUTATION (node
).exists ())
1759 dump_printf_loc (metadata
, user_loc
, "\tlane permutation {");
1760 for (i
= 0; i
< SLP_TREE_LANE_PERMUTATION (node
).length (); ++i
)
1761 dump_printf (dump_kind
, " %u[%u]",
1762 SLP_TREE_LANE_PERMUTATION (node
)[i
].first
,
1763 SLP_TREE_LANE_PERMUTATION (node
)[i
].second
);
1764 dump_printf (dump_kind
, " }\n");
1766 if (SLP_TREE_CHILDREN (node
).is_empty ())
1768 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1769 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1770 dump_printf (dump_kind
, " %p", (void *)child
);
1771 dump_printf (dump_kind
, "\n");
1775 debug (slp_tree node
)
1777 debug_dump_context ctx
;
1778 vect_print_slp_tree (MSG_NOTE
,
1779 dump_location_t::from_location_t (UNKNOWN_LOCATION
),
1783 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1786 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1787 slp_tree node
, hash_set
<slp_tree
> &visited
)
1792 if (visited
.add (node
))
1795 vect_print_slp_tree (dump_kind
, loc
, node
);
1797 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1798 vect_print_slp_graph (dump_kind
, loc
, child
, visited
);
1802 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1805 hash_set
<slp_tree
> visited
;
1806 vect_print_slp_graph (dump_kind
, loc
, entry
, visited
);
1809 /* Mark the tree rooted at NODE with PURE_SLP. */
1812 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1815 stmt_vec_info stmt_info
;
1818 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1821 if (visited
.add (node
))
1824 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1825 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1827 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1828 vect_mark_slp_stmts (child
, visited
);
1832 vect_mark_slp_stmts (slp_tree node
)
1834 hash_set
<slp_tree
> visited
;
1835 vect_mark_slp_stmts (node
, visited
);
1838 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1841 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1844 stmt_vec_info stmt_info
;
1847 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1850 if (visited
.add (node
))
1853 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1855 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1856 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1857 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1860 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1861 vect_mark_slp_stmts_relevant (child
, visited
);
1865 vect_mark_slp_stmts_relevant (slp_tree node
)
1867 hash_set
<slp_tree
> visited
;
1868 vect_mark_slp_stmts_relevant (node
, visited
);
1872 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1875 vect_gather_slp_loads (vec
<slp_tree
> &loads
, slp_tree node
,
1876 hash_set
<slp_tree
> &visited
)
1878 if (visited
.add (node
))
1881 if (SLP_TREE_CHILDREN (node
).length () == 0)
1883 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1885 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1886 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1887 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1888 loads
.safe_push (node
);
1894 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1895 vect_gather_slp_loads (loads
, child
, visited
);
1900 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1902 hash_set
<slp_tree
> visited
;
1903 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst
), node
, visited
);
1907 /* Find the last store in SLP INSTANCE. */
1910 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1912 stmt_vec_info last
= NULL
;
1913 stmt_vec_info stmt_vinfo
;
1915 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1917 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1918 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1924 /* Find the first stmt in NODE. */
1927 vect_find_first_scalar_stmt_in_slp (slp_tree node
)
1929 stmt_vec_info first
= NULL
;
1930 stmt_vec_info stmt_vinfo
;
1932 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1934 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1936 || get_later_stmt (stmt_vinfo
, first
) == first
)
1943 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1944 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1945 (also containing the first GROUP1_SIZE stmts, since stores are
1946 consecutive), the second containing the remainder.
1947 Return the first stmt in the second group. */
1949 static stmt_vec_info
1950 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1952 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1953 gcc_assert (group1_size
> 0);
1954 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1955 gcc_assert (group2_size
> 0);
1956 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1958 stmt_vec_info stmt_info
= first_vinfo
;
1959 for (unsigned i
= group1_size
; i
> 1; i
--)
1961 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1962 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1964 /* STMT is now the last element of the first group. */
1965 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1966 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1968 DR_GROUP_SIZE (group2
) = group2_size
;
1969 for (stmt_info
= group2
; stmt_info
;
1970 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1972 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1973 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1976 /* For the second group, the DR_GROUP_GAP is that before the original group,
1977 plus skipping over the first vector. */
1978 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1980 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1981 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1983 if (dump_enabled_p ())
1984 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1985 group1_size
, group2_size
);
1990 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1991 statements and a vector of NUNITS elements. */
1994 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1996 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1999 /* Analyze an SLP instance starting from a group of grouped stores. Call
2000 vect_build_slp_tree to build a tree of packed stmts if possible.
2001 Return FALSE if it's impossible to SLP any stmt in the loop. */
2004 vect_analyze_slp_instance (vec_info
*vinfo
,
2005 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2006 stmt_vec_info stmt_info
, unsigned max_tree_size
)
2008 slp_instance new_instance
;
2010 unsigned int group_size
;
2011 tree vectype
, scalar_type
= NULL_TREE
;
2013 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2014 vec
<stmt_vec_info
> scalar_stmts
;
2015 bool constructor
= false;
2017 if (is_a
<bb_vec_info
> (vinfo
))
2018 vect_location
= stmt_info
->stmt
;
2019 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2021 scalar_type
= TREE_TYPE (DR_REF (dr
));
2022 group_size
= DR_GROUP_SIZE (stmt_info
);
2023 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
2025 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2027 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2028 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2029 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2031 else if (is_gimple_assign (stmt_info
->stmt
)
2032 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2034 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
2035 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2040 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2041 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2042 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2047 if (dump_enabled_p ())
2048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2049 "Build SLP failed: unsupported data-type %T\n",
2054 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2056 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2057 scalar_stmts
.create (group_size
);
2058 stmt_vec_info next_info
= stmt_info
;
2059 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2061 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2064 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2065 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2068 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2070 /* Collect the reduction stmts and store them in
2071 SLP_TREE_SCALAR_STMTS. */
2074 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2075 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2077 /* Mark the first element of the reduction chain as reduction to properly
2078 transform the node. In the reduction analysis phase only the last
2079 element of the chain is marked as reduction. */
2080 STMT_VINFO_DEF_TYPE (stmt_info
)
2081 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2082 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2083 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2085 else if (constructor
)
2087 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2089 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2091 if (TREE_CODE (val
) == SSA_NAME
)
2093 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2094 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2095 /* Value is defined in another basic block. */
2098 def_info
= vect_stmt_to_vectorize (def_info
);
2099 scalar_stmts
.safe_push (def_info
);
2104 if (dump_enabled_p ())
2105 dump_printf_loc (MSG_NOTE
, vect_location
,
2106 "Analyzing vectorizable constructor: %G\n",
2111 /* Collect reduction statements. */
2112 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2113 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2114 scalar_stmts
.safe_push (next_info
);
2117 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_NOTE
, vect_location
,
2120 "Starting SLP discovery for\n");
2121 for (i
= 0; i
< scalar_stmts
.length (); ++i
)
2122 dump_printf_loc (MSG_NOTE
, vect_location
,
2123 " %G", scalar_stmts
[i
]->stmt
);
2126 /* Build the tree for the SLP instance. */
2127 bool *matches
= XALLOCAVEC (bool, group_size
);
2128 unsigned npermutes
= 0;
2129 poly_uint64 max_nunits
= nunits
;
2130 unsigned tree_size
= 0;
2131 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2132 &max_nunits
, matches
, &npermutes
,
2133 &tree_size
, bst_map
);
2136 /* Calculate the unrolling factor based on the smallest type. */
2137 poly_uint64 unrolling_factor
2138 = calculate_unrolling_factor (max_nunits
, group_size
);
2140 if (maybe_ne (unrolling_factor
, 1U)
2141 && is_a
<bb_vec_info
> (vinfo
))
2143 unsigned HOST_WIDE_INT const_max_nunits
;
2144 if (!max_nunits
.is_constant (&const_max_nunits
)
2145 || const_max_nunits
> group_size
)
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2149 "Build SLP failed: store group "
2150 "size not a multiple of the vector size "
2151 "in basic block SLP\n");
2152 vect_free_slp_tree (node
);
2155 /* Fatal mismatch. */
2156 if (dump_enabled_p ())
2157 dump_printf_loc (MSG_NOTE
, vect_location
,
2158 "SLP discovery succeeded but node needs "
2160 memset (matches
, true, group_size
);
2161 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2162 vect_free_slp_tree (node
);
2166 /* Create a new SLP instance. */
2167 new_instance
= XNEW (class _slp_instance
);
2168 SLP_INSTANCE_TREE (new_instance
) = node
;
2169 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2170 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2171 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2172 new_instance
->reduc_phis
= NULL
;
2173 new_instance
->cost_vec
= vNULL
;
2174 new_instance
->subgraph_entries
= vNULL
;
2176 vect_gather_slp_loads (new_instance
, node
);
2177 if (dump_enabled_p ())
2178 dump_printf_loc (MSG_NOTE
, vect_location
,
2179 "SLP size %u vs. limit %u.\n",
2180 tree_size
, max_tree_size
);
2182 /* Check whether any load is possibly permuted. */
2184 bool loads_permuted
= false;
2185 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2187 if (!SLP_TREE_LOAD_PERMUTATION (load_node
).exists ())
2190 stmt_vec_info load_info
;
2191 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2192 if (SLP_TREE_LOAD_PERMUTATION (load_node
)[j
] != j
)
2194 loads_permuted
= true;
2199 /* If the loads and stores can be handled with load/store-lane
2200 instructions do not generate this SLP instance. */
2201 if (is_a
<loop_vec_info
> (vinfo
)
2203 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2206 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2208 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2209 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2210 /* Use SLP for strided accesses (or if we can't load-lanes). */
2211 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2212 || ! vect_load_lanes_supported
2213 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2214 DR_GROUP_SIZE (stmt_vinfo
), false))
2217 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2219 if (dump_enabled_p ())
2220 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2221 "Built SLP cancelled: can use "
2222 "load/store-lanes\n");
2223 vect_free_slp_instance (new_instance
);
2228 /* If this is a reduction chain with a conversion in front
2229 amend the SLP tree with a node for that. */
2231 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2232 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2234 /* Get at the conversion stmt - we know it's the single use
2235 of the last stmt of the reduction chain. */
2236 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2237 use_operand_p use_p
;
2239 bool r
= single_imm_use (gimple_assign_lhs (tem
),
2242 next_info
= vinfo
->lookup_stmt (use_stmt
);
2243 next_info
= vect_stmt_to_vectorize (next_info
);
2244 scalar_stmts
= vNULL
;
2245 scalar_stmts
.create (group_size
);
2246 for (unsigned i
= 0; i
< group_size
; ++i
)
2247 scalar_stmts
.quick_push (next_info
);
2248 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
, 1);
2249 SLP_TREE_VECTYPE (conv
) = STMT_VINFO_VECTYPE (next_info
);
2250 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2251 SLP_INSTANCE_TREE (new_instance
) = conv
;
2252 /* We also have to fake this conversion stmt as SLP reduction
2253 group so we don't have to mess with too much code
2255 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2256 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2259 vinfo
->slp_instances
.safe_push (new_instance
);
2261 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2262 the number of scalar stmts in the root in a few places.
2263 Verify that assumption holds. */
2264 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance
))
2265 .length () == group_size
);
2267 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_NOTE
, vect_location
,
2270 "Final SLP tree for instance:\n");
2271 vect_print_slp_graph (MSG_NOTE
, vect_location
,
2272 SLP_INSTANCE_TREE (new_instance
));
2280 /* Failed to SLP. */
2281 /* Free the allocated memory. */
2282 scalar_stmts
.release ();
2285 /* Try to break the group up into pieces. */
2286 unsigned HOST_WIDE_INT const_nunits
;
2287 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2288 && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
))
2289 && nunits
.is_constant (&const_nunits
))
2291 for (i
= 0; i
< group_size
; i
++)
2295 /* For basic block SLP, try to break the group up into multiples of the
2297 if (is_a
<bb_vec_info
> (vinfo
)
2298 && (i
>= const_nunits
&& i
< group_size
))
2300 /* Split into two groups at the first vector boundary before i. */
2301 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2302 unsigned group1_size
= i
& ~(const_nunits
- 1);
2304 if (dump_enabled_p ())
2305 dump_printf_loc (MSG_NOTE
, vect_location
,
2306 "Splitting SLP group at stmt %u\n", i
);
2307 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2309 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2311 /* If the first non-match was in the middle of a vector,
2312 skip the rest of that vector. Do not bother to re-analyze
2313 single stmt groups. */
2314 if (group1_size
< i
)
2316 i
= group1_size
+ const_nunits
;
2317 if (i
+ 1 < group_size
)
2318 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2320 if (i
+ 1 < group_size
)
2321 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2322 rest
, max_tree_size
);
2326 /* For loop vectorization split into arbitrary pieces of size > 1. */
2327 if (is_a
<loop_vec_info
> (vinfo
)
2328 && (i
> 1 && i
< group_size
))
2330 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2331 unsigned group1_size
= i
;
2333 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_NOTE
, vect_location
,
2335 "Splitting SLP group at stmt %u\n", i
);
2337 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2339 /* Loop vectorization cannot handle gaps in stores, make sure
2340 the split group appears as strided. */
2341 STMT_VINFO_STRIDED_P (rest
) = 1;
2342 DR_GROUP_GAP (rest
) = 0;
2343 STMT_VINFO_STRIDED_P (stmt_info
) = 1;
2344 DR_GROUP_GAP (stmt_info
) = 0;
2346 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2348 if (i
+ 1 < group_size
)
2349 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2350 rest
, max_tree_size
);
2355 /* Even though the first vector did not all match, we might be able to SLP
2356 (some) of the remainder. FORNOW ignore this possibility. */
2359 /* Failed to SLP. */
2360 if (dump_enabled_p ())
2361 dump_printf_loc (MSG_NOTE
, vect_location
, "SLP discovery failed\n");
2365 /* Fill in backedge SLP children in the SLP graph. */
2368 vect_analyze_slp_backedges (vec_info
*vinfo
, slp_tree node
,
2369 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2370 hash_set
<slp_tree
> &visited
)
2372 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
2373 || visited
.add (node
))
2378 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2380 vect_analyze_slp_backedges (vinfo
, child
, bst_map
, visited
);
2382 if (gphi
*phi
= dyn_cast
<gphi
*> (SLP_TREE_REPRESENTATIVE (node
)->stmt
))
2383 for (unsigned i
= 0; i
< gimple_phi_num_args (phi
); ++i
)
2385 auto_vec
<stmt_vec_info
, 64> stmts
;
2387 stmt_vec_info phi_info
;
2388 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, phi_info
)
2390 tree def
= gimple_phi_arg_def (as_a
<gphi
*>(phi_info
->stmt
), i
);
2391 stmt_vec_info def_info
= vinfo
->lookup_def (def
);
2394 stmts
.safe_push (vect_stmt_to_vectorize (def_info
));
2396 if (j
!= SLP_TREE_LANES (node
))
2398 slp_tree
*edge_def
= bst_map
->get (stmts
);
2401 /* ??? We're currently not recording non-backedge children
2402 of PHIs like external reduction initial values. Avoid
2403 NULL entries in SLP_TREE_CHILDREN for those and thus
2404 for now simply only record backedge defs at a
2405 SLP_TREE_CHILDREN index possibly not matching that of
2406 the corresponding PHI argument index. */
2407 SLP_TREE_CHILDREN (node
).quick_push (*edge_def
);
2408 (*edge_def
)->refcnt
++;
2413 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2414 trees of packed scalar stmts if SLP is possible. */
2417 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2420 stmt_vec_info first_element
;
2422 DUMP_VECT_SCOPE ("vect_analyze_slp");
2424 scalar_stmts_to_slp_tree_map_t
*bst_map
2425 = new scalar_stmts_to_slp_tree_map_t ();
2427 /* Find SLP sequences starting from groups of grouped stores. */
2428 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2429 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
, max_tree_size
);
2431 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2433 if (loop_vinfo
->reduction_chains
.length () > 0)
2435 /* Find SLP sequences starting from reduction chains. */
2436 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2437 if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2440 /* Dissolve reduction chain group. */
2441 stmt_vec_info vinfo
= first_element
;
2442 stmt_vec_info last
= NULL
;
2445 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2446 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2447 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2451 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2452 /* It can be still vectorized as part of an SLP reduction. */
2453 loop_vinfo
->reductions
.safe_push (last
);
2457 /* Find SLP sequences starting from groups of reductions. */
2458 if (loop_vinfo
->reductions
.length () > 1)
2459 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2463 /* Fill in backedges. */
2464 slp_instance instance
;
2465 hash_set
<slp_tree
> visited
;
2466 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2467 vect_analyze_slp_backedges (vinfo
, SLP_INSTANCE_TREE (instance
),
2470 /* The map keeps a reference on SLP nodes built, release that. */
2471 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2472 it
!= bst_map
->end (); ++it
)
2474 vect_free_slp_tree ((*it
).second
);
2477 return opt_result::success ();
2480 /* Fill the vertices and leafs vector with all nodes in the SLP graph. */
2483 vect_slp_build_vertices (hash_set
<slp_tree
> &visited
, slp_tree node
,
2484 vec
<slp_tree
> &vertices
, vec
<int> &leafs
)
2489 if (visited
.add (node
))
2492 node
->vertex
= vertices
.length ();
2493 vertices
.safe_push (node
);
2494 if (SLP_TREE_CHILDREN (node
).is_empty ())
2495 leafs
.safe_push (node
->vertex
);
2497 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2498 vect_slp_build_vertices (visited
, child
, vertices
, leafs
);
2501 /* Fill the vertices and leafs vector with all nodes in the SLP graph. */
2504 vect_slp_build_vertices (vec_info
*info
, vec
<slp_tree
> &vertices
,
2507 hash_set
<slp_tree
> visited
;
2509 slp_instance instance
;
2510 FOR_EACH_VEC_ELT (info
->slp_instances
, i
, instance
)
2511 vect_slp_build_vertices (visited
, SLP_INSTANCE_TREE (instance
), vertices
,
2515 /* Apply (reverse) bijectite PERM to VEC. */
2519 vect_slp_permute (vec
<unsigned> perm
,
2520 vec
<T
> &vec
, bool reverse
)
2522 auto_vec
<T
, 64> saved
;
2523 saved
.create (vec
.length ());
2524 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2525 saved
.quick_push (vec
[i
]);
2529 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2530 vec
[perm
[i
]] = saved
[i
];
2531 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2532 gcc_assert (vec
[perm
[i
]] == saved
[i
]);
2536 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2537 vec
[i
] = saved
[perm
[i
]];
2538 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2539 gcc_assert (vec
[i
] == saved
[perm
[i
]]);
2543 /* Return whether permutations PERM_A and PERM_B as recorded in the
2544 PERMS vector are equal. */
2547 vect_slp_perms_eq (const vec
<vec
<unsigned> > &perms
,
2548 int perm_a
, int perm_b
)
2550 return (perm_a
== perm_b
2551 || (perms
[perm_a
].length () == perms
[perm_b
].length ()
2552 && memcmp (&perms
[perm_a
][0], &perms
[perm_b
][0],
2553 sizeof (unsigned) * perms
[perm_a
].length ()) == 0));
2556 /* Optimize the SLP graph of VINFO. */
2559 vect_optimize_slp (vec_info
*vinfo
)
2561 if (vinfo
->slp_instances
.is_empty ())
2566 auto_vec
<slp_tree
> vertices
;
2567 auto_vec
<int> leafs
;
2568 vect_slp_build_vertices (vinfo
, vertices
, leafs
);
2570 struct graph
*slpg
= new_graph (vertices
.length ());
2571 FOR_EACH_VEC_ELT (vertices
, i
, node
)
2575 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2576 add_edge (slpg
, i
, child
->vertex
);
2579 /* Compute (reverse) postorder on the inverted graph. */
2581 graphds_dfs (slpg
, &leafs
[0], leafs
.length (), &ipo
, false, NULL
, NULL
);
2583 auto_sbitmap
n_visited (vertices
.length ());
2584 auto_sbitmap
n_materialize (vertices
.length ());
2585 auto_vec
<int> n_perm (vertices
.length ());
2586 auto_vec
<vec
<unsigned> > perms
;
2588 bitmap_clear (n_visited
);
2589 bitmap_clear (n_materialize
);
2590 n_perm
.quick_grow_cleared (vertices
.length ());
2591 perms
.safe_push (vNULL
); /* zero is no permute */
2593 /* Produce initial permutations. */
2594 for (i
= 0; i
< leafs
.length (); ++i
)
2597 slp_tree node
= vertices
[idx
];
2599 /* Handle externals and constants optimistically throughout the
2601 if (SLP_TREE_DEF_TYPE (node
) == vect_external_def
2602 || SLP_TREE_DEF_TYPE (node
) == vect_constant_def
)
2605 /* Loads are the only thing generating permutes and leafs do not
2606 change across iterations. */
2607 bitmap_set_bit (n_visited
, idx
);
2608 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2611 /* If splitting out a SLP_TREE_LANE_PERMUTATION can make the
2612 node unpermuted, record this permute. */
2613 stmt_vec_info dr_stmt
= SLP_TREE_REPRESENTATIVE (node
);
2614 if (!STMT_VINFO_GROUPED_ACCESS (dr_stmt
))
2616 dr_stmt
= DR_GROUP_FIRST_ELEMENT (dr_stmt
);
2617 unsigned imin
= DR_GROUP_SIZE (dr_stmt
) + 1, imax
= 0;
2618 bool any_permute
= false;
2619 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2621 unsigned idx
= SLP_TREE_LOAD_PERMUTATION (node
)[j
];
2622 imin
= MIN (imin
, idx
);
2623 imax
= MAX (imax
, idx
);
2624 if (idx
- SLP_TREE_LOAD_PERMUTATION (node
)[0] != j
)
2627 /* If there's no permute no need to split one out. */
2630 /* If the span doesn't match we'd disrupt VF computation, avoid
2632 if (imax
- imin
+ 1 != SLP_TREE_LANES (node
))
2635 /* For now only handle true permutes, like
2636 vect_attempt_slp_rearrange_stmts did. This allows us to be lazy
2637 when permuting constants and invariants keeping the permute
2639 auto_sbitmap
load_index (SLP_TREE_LANES (node
));
2640 bitmap_clear (load_index
);
2641 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2642 bitmap_set_bit (load_index
, SLP_TREE_LOAD_PERMUTATION (node
)[j
] - imin
);
2644 for (j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2645 if (!bitmap_bit_p (load_index
, j
))
2647 if (j
!= SLP_TREE_LANES (node
))
2650 vec
<unsigned> perm
= vNULL
;
2651 perm
.safe_grow (SLP_TREE_LANES (node
), true);
2652 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2653 perm
[j
] = SLP_TREE_LOAD_PERMUTATION (node
)[j
] - imin
;
2654 perms
.safe_push (perm
);
2655 n_perm
[idx
] = perms
.length () - 1;
2658 /* Propagate permutes along the graph and compute materialization points. */
2660 unsigned iteration
= 0;
2666 for (i
= vertices
.length (); i
> 0 ; --i
)
2669 slp_tree node
= vertices
[idx
];
2670 /* For leafs there's nothing to do - we've seeded permutes
2672 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2675 bitmap_set_bit (n_visited
, idx
);
2677 /* We cannot move a permute across a store. */
2678 if (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node
))
2680 (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node
))))
2684 for (graph_edge
*succ
= slpg
->vertices
[idx
].succ
;
2685 succ
; succ
= succ
->succ_next
)
2687 int succ_idx
= succ
->dest
;
2688 /* Handle unvisited nodes optimistically. */
2689 /* ??? But for constants once we want to handle non-bijective
2690 permutes we have to verify the permute, when unifying lanes,
2691 will not unify different constants. For example see
2692 gcc.dg/vect/bb-slp-14.c for a case that would break. */
2693 if (!bitmap_bit_p (n_visited
, succ_idx
))
2695 int succ_perm
= n_perm
[succ_idx
];
2696 /* Once we materialize succs permutation its output lanes
2697 appear unpermuted to us. */
2698 if (bitmap_bit_p (n_materialize
, succ_idx
))
2702 else if (succ_perm
== 0)
2707 else if (!vect_slp_perms_eq (perms
, perm
, succ_perm
))
2715 /* Pick up pre-computed leaf values. */
2717 else if (!vect_slp_perms_eq (perms
, perm
, n_perm
[idx
]))
2720 /* Make sure we eventually converge. */
2721 gcc_checking_assert (perm
== 0);
2724 bitmap_clear_bit (n_materialize
, idx
);
2731 /* Elide pruning at materialization points in the first
2732 iteration so every node was visited once at least. */
2736 /* Decide on permute materialization. Look whether there's
2737 a use (pred) edge that is permuted differently than us.
2738 In that case mark ourselves so the permutation is applied. */
2739 bool all_preds_permuted
= slpg
->vertices
[idx
].pred
!= NULL
;
2740 for (graph_edge
*pred
= slpg
->vertices
[idx
].pred
;
2741 pred
; pred
= pred
->pred_next
)
2743 gcc_checking_assert (bitmap_bit_p (n_visited
, pred
->src
));
2744 int pred_perm
= n_perm
[pred
->src
];
2745 if (!vect_slp_perms_eq (perms
, perm
, pred_perm
))
2747 all_preds_permuted
= false;
2751 if (!all_preds_permuted
)
2753 if (!bitmap_bit_p (n_materialize
, idx
))
2755 bitmap_set_bit (n_materialize
, idx
);
2759 while (changed
|| iteration
== 1);
2762 for (i
= 0; i
< vertices
.length (); ++i
)
2764 int perm
= n_perm
[i
];
2768 slp_tree node
= vertices
[i
];
2770 /* First permute invariant/external original successors. */
2773 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2775 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2778 /* If the vector is uniform there's nothing to do. */
2779 if (vect_slp_tree_uniform_p (child
))
2782 /* We can end up sharing some externals via two_operator
2783 handling. Be prepared to unshare those. */
2784 if (child
->refcnt
!= 1)
2786 gcc_assert (slpg
->vertices
[child
->vertex
].pred
->pred_next
);
2787 SLP_TREE_CHILDREN (node
)[j
] = child
2788 = vect_create_new_slp_node
2789 (SLP_TREE_SCALAR_OPS (child
).copy ());
2791 vect_slp_permute (perms
[perm
],
2792 SLP_TREE_SCALAR_OPS (child
), true);
2795 if (bitmap_bit_p (n_materialize
, i
))
2797 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2798 /* For loads simply drop the permutation, the load permutation
2799 already performs the desired permutation. */
2803 if (dump_enabled_p ())
2804 dump_printf_loc (MSG_NOTE
, vect_location
,
2805 "inserting permute node in place of %p\n",
2808 /* Make a copy of NODE and in-place change it to a
2809 VEC_PERM node to permute the lanes of the copy. */
2810 slp_tree copy
= new _slp_tree
;
2811 SLP_TREE_CHILDREN (copy
) = SLP_TREE_CHILDREN (node
);
2812 SLP_TREE_CHILDREN (node
) = vNULL
;
2813 SLP_TREE_SCALAR_STMTS (copy
)
2814 = SLP_TREE_SCALAR_STMTS (node
).copy ();
2815 vect_slp_permute (perms
[perm
],
2816 SLP_TREE_SCALAR_STMTS (copy
), true);
2817 gcc_assert (!SLP_TREE_SCALAR_OPS (node
).exists ());
2818 SLP_TREE_REPRESENTATIVE (copy
) = SLP_TREE_REPRESENTATIVE (node
);
2819 gcc_assert (!SLP_TREE_LOAD_PERMUTATION (node
).exists ());
2820 SLP_TREE_LANE_PERMUTATION (copy
)
2821 = SLP_TREE_LANE_PERMUTATION (node
);
2822 SLP_TREE_LANE_PERMUTATION (node
) = vNULL
;
2823 SLP_TREE_VECTYPE (copy
) = SLP_TREE_VECTYPE (node
);
2825 copy
->max_nunits
= node
->max_nunits
;
2826 SLP_TREE_DEF_TYPE (copy
) = SLP_TREE_DEF_TYPE (node
);
2827 SLP_TREE_LANES (copy
) = SLP_TREE_LANES (node
);
2828 SLP_TREE_CODE (copy
) = SLP_TREE_CODE (node
);
2830 /* Now turn NODE into a VEC_PERM. */
2831 SLP_TREE_CHILDREN (node
).safe_push (copy
);
2832 SLP_TREE_LANE_PERMUTATION (node
).create (SLP_TREE_LANES (node
));
2833 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2834 SLP_TREE_LANE_PERMUTATION (node
)
2835 .quick_push (std::make_pair (0, perms
[perm
][j
]));
2836 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
2841 /* Apply the reverse permutation to our stmts. */
2842 vect_slp_permute (perms
[perm
],
2843 SLP_TREE_SCALAR_STMTS (node
), true);
2844 /* And to the load permutation, which we can simply
2845 make regular by design. */
2846 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2848 /* ??? When we handle non-bijective permutes the idea
2849 is that we can force the load-permutation to be
2850 { min, min + 1, min + 2, ... max }. But then the
2851 scalar defs might no longer match the lane content
2852 which means wrong-code with live lane vectorization.
2853 So we possibly have to have NULL entries for those. */
2854 vect_slp_permute (perms
[perm
],
2855 SLP_TREE_LOAD_PERMUTATION (node
), true);
2860 /* Free the perms vector used for propagation. */
2861 while (!perms
.is_empty ())
2862 perms
.pop ().release ();
2866 /* Now elide load permutations that are not necessary. */
2867 for (i
= 0; i
< leafs
.length (); ++i
)
2870 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2873 /* In basic block vectorization we allow any subchain of an interleaving
2875 FORNOW: not in loop SLP because of realignment complications. */
2876 if (is_a
<bb_vec_info
> (vinfo
))
2878 bool subchain_p
= true;
2879 stmt_vec_info next_load_info
= NULL
;
2880 stmt_vec_info load_info
;
2882 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2885 && (next_load_info
!= load_info
2886 || DR_GROUP_GAP (load_info
) != 1))
2891 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
2895 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2901 stmt_vec_info load_info
;
2902 bool this_load_permuted
= false;
2904 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2905 if (SLP_TREE_LOAD_PERMUTATION (node
)[j
] != j
)
2907 this_load_permuted
= true;
2910 stmt_vec_info first_stmt_info
2911 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
2912 if (!this_load_permuted
2913 /* The load requires permutation when unrolling exposes
2914 a gap either because the group is larger than the SLP
2915 group-size or because there is a gap between the groups. */
2916 && (known_eq (LOOP_VINFO_VECT_FACTOR
2917 (as_a
<loop_vec_info
> (vinfo
)), 1U)
2918 || ((SLP_TREE_LANES (node
) == DR_GROUP_SIZE (first_stmt_info
))
2919 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2921 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2929 /* For each possible SLP instance decide whether to SLP it and calculate overall
2930 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2931 least one instance. */
2934 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2937 poly_uint64 unrolling_factor
= 1;
2938 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2939 slp_instance instance
;
2940 int decided_to_slp
= 0;
2942 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2944 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2946 /* FORNOW: SLP if you can. */
2947 /* All unroll factors have the form:
2949 GET_MODE_SIZE (vinfo->vector_mode) * X
2951 for some rational X, so they must have a common multiple. */
2953 = force_common_multiple (unrolling_factor
,
2954 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2956 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2957 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2958 loop-based vectorization. Such stmts will be marked as HYBRID. */
2959 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2963 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2965 if (decided_to_slp
&& dump_enabled_p ())
2967 dump_printf_loc (MSG_NOTE
, vect_location
,
2968 "Decided to SLP %d instances. Unrolling factor ",
2970 dump_dec (MSG_NOTE
, unrolling_factor
);
2971 dump_printf (MSG_NOTE
, "\n");
2974 return (decided_to_slp
> 0);
2977 /* Private data for vect_detect_hybrid_slp. */
2980 loop_vec_info loop_vinfo
;
2981 vec
<stmt_vec_info
> *worklist
;
2984 /* Walker for walk_gimple_op. */
2987 vect_detect_hybrid_slp (tree
*tp
, int *, void *data
)
2989 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2990 vdhs_data
*dat
= (vdhs_data
*)wi
->info
;
2995 stmt_vec_info def_stmt_info
= dat
->loop_vinfo
->lookup_def (*tp
);
2998 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
2999 if (PURE_SLP_STMT (def_stmt_info
))
3001 if (dump_enabled_p ())
3002 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
3003 def_stmt_info
->stmt
);
3004 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
3005 dat
->worklist
->safe_push (def_stmt_info
);
3011 /* Find stmts that must be both vectorized and SLPed. */
3014 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
3016 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
3018 /* All stmts participating in SLP are marked pure_slp, all other
3019 stmts are loop_vect.
3020 First collect all loop_vect stmts into a worklist. */
3021 auto_vec
<stmt_vec_info
> worklist
;
3022 for (unsigned i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
3024 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
3025 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
3028 gphi
*phi
= gsi
.phi ();
3029 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (phi
);
3030 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
3031 worklist
.safe_push (stmt_info
);
3033 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
3036 gimple
*stmt
= gsi_stmt (gsi
);
3037 if (is_gimple_debug (stmt
))
3039 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
3040 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
3042 for (gimple_stmt_iterator gsi2
3043 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
3044 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
3046 stmt_vec_info patt_info
3047 = loop_vinfo
->lookup_stmt (gsi_stmt (gsi2
));
3048 if (!STMT_SLP_TYPE (patt_info
)
3049 && STMT_VINFO_RELEVANT (patt_info
))
3050 worklist
.safe_push (patt_info
);
3052 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
3054 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
3055 worklist
.safe_push (stmt_info
);
3059 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
3060 mark any SLP vectorized stmt as hybrid.
3061 ??? We're visiting def stmts N times (once for each non-SLP and
3062 once for each hybrid-SLP use). */
3065 dat
.worklist
= &worklist
;
3066 dat
.loop_vinfo
= loop_vinfo
;
3067 memset (&wi
, 0, sizeof (wi
));
3068 wi
.info
= (void *)&dat
;
3069 while (!worklist
.is_empty ())
3071 stmt_vec_info stmt_info
= worklist
.pop ();
3072 /* Since SSA operands are not set up for pattern stmts we need
3073 to use walk_gimple_op. */
3075 walk_gimple_op (stmt_info
->stmt
, vect_detect_hybrid_slp
, &wi
);
3080 /* Initialize a bb_vec_info struct for the statements in BBS basic blocks. */
3082 _bb_vec_info::_bb_vec_info (vec
<basic_block
> _bbs
, vec_info_shared
*shared
)
3083 : vec_info (vec_info::bb
, init_cost (NULL
), shared
), bbs (_bbs
)
3085 for (unsigned i
= 0; i
< bbs
.length (); ++i
)
3088 for (gphi_iterator si
= gsi_start_phis (bbs
[i
]); !gsi_end_p (si
);
3091 gphi
*phi
= si
.phi ();
3092 gimple_set_uid (phi
, 0);
3095 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
3096 !gsi_end_p (gsi
); gsi_next (&gsi
))
3098 gimple
*stmt
= gsi_stmt (gsi
);
3099 gimple_set_uid (stmt
, 0);
3100 if (is_gimple_debug (stmt
))
3108 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
3109 stmts in the basic block. */
3111 _bb_vec_info::~_bb_vec_info ()
3113 /* Reset region marker. */
3114 for (unsigned i
= 0; i
< bbs
.length (); ++i
)
3117 for (gphi_iterator si
= gsi_start_phis (bbs
[i
]); !gsi_end_p (si
);
3120 gphi
*phi
= si
.phi ();
3121 gimple_set_uid (phi
, -1);
3123 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
3124 !gsi_end_p (gsi
); gsi_next (&gsi
))
3126 gimple
*stmt
= gsi_stmt (gsi
);
3127 gimple_set_uid (stmt
, -1);
3132 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
3133 given then that child nodes have already been processed, and that
3134 their def types currently match their SLP node's def type. */
3137 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
3138 slp_instance node_instance
,
3139 stmt_vector_for_cost
*cost_vec
)
3141 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
3142 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
3144 /* Calculate the number of vector statements to be created for the
3145 scalar stmts in this node. For SLP reductions it is equal to the
3146 number of vector statements in the children (which has already been
3147 calculated by the recursive call). Otherwise it is the number of
3148 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
3149 VF divided by the number of elements in a vector. */
3150 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3151 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
3153 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
3154 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
3156 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
3157 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
3164 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
3165 vf
= loop_vinfo
->vectorization_factor
;
3168 unsigned int group_size
= SLP_TREE_LANES (node
);
3169 tree vectype
= SLP_TREE_VECTYPE (node
);
3170 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
3171 = vect_get_num_vectors (vf
* group_size
, vectype
);
3174 /* Handle purely internal nodes. */
3175 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
3176 return vectorizable_slp_permutation (vinfo
, NULL
, node
, cost_vec
);
3178 if (is_a
<bb_vec_info
> (vinfo
)
3179 && !vect_update_shared_vectype (stmt_info
, SLP_TREE_VECTYPE (node
)))
3183 return vect_analyze_stmt (vinfo
, stmt_info
, &dummy
,
3184 node
, node_instance
, cost_vec
);
3187 /* Try to build NODE from scalars, returning true on success.
3188 NODE_INSTANCE is the SLP instance that contains NODE. */
3191 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
3192 slp_instance node_instance
)
3194 stmt_vec_info stmt_info
;
3197 if (!is_a
<bb_vec_info
> (vinfo
)
3198 || node
== SLP_INSTANCE_TREE (node_instance
)
3199 || !SLP_TREE_SCALAR_STMTS (node
).exists ()
3200 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
3203 if (dump_enabled_p ())
3204 dump_printf_loc (MSG_NOTE
, vect_location
,
3205 "Building vector operands from scalars instead\n");
3207 /* Don't remove and free the child nodes here, since they could be
3208 referenced by other structures. The analysis and scheduling phases
3209 (need to) ignore child nodes of anything that isn't vect_internal_def. */
3210 unsigned int group_size
= SLP_TREE_LANES (node
);
3211 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
3212 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
, true);
3213 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3215 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
3216 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
3221 /* Compute the prologue cost for invariant or constant operands represented
3225 vect_prologue_cost_for_slp (slp_tree node
,
3226 stmt_vector_for_cost
*cost_vec
)
3228 /* There's a special case of an existing vector, that costs nothing. */
3229 if (SLP_TREE_SCALAR_OPS (node
).length () == 0
3230 && !SLP_TREE_VEC_DEFS (node
).is_empty ())
3232 /* Without looking at the actual initializer a vector of
3233 constants can be implemented as load from the constant pool.
3234 When all elements are the same we can use a splat. */
3235 tree vectype
= SLP_TREE_VECTYPE (node
);
3236 unsigned group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
3237 unsigned num_vects_to_check
;
3238 unsigned HOST_WIDE_INT const_nunits
;
3239 unsigned nelt_limit
;
3240 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
3241 && ! multiple_p (const_nunits
, group_size
))
3243 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
3244 nelt_limit
= const_nunits
;
3248 /* If either the vector has variable length or the vectors
3249 are composed of repeated whole groups we only need to
3250 cost construction once. All vectors will be the same. */
3251 num_vects_to_check
= 1;
3252 nelt_limit
= group_size
;
3254 tree elt
= NULL_TREE
;
3256 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
3258 unsigned si
= j
% group_size
;
3260 elt
= SLP_TREE_SCALAR_OPS (node
)[si
];
3261 /* ??? We're just tracking whether all operands of a single
3262 vector initializer are the same, ideally we'd check if
3263 we emitted the same one already. */
3264 else if (elt
!= SLP_TREE_SCALAR_OPS (node
)[si
])
3267 if (nelt
== nelt_limit
)
3269 record_stmt_cost (cost_vec
, 1,
3270 SLP_TREE_DEF_TYPE (node
) == vect_external_def
3271 ? (elt
? scalar_to_vec
: vec_construct
)
3273 NULL
, vectype
, 0, vect_prologue
);
3279 /* Analyze statements contained in SLP tree NODE after recursively analyzing
3280 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
3282 Return true if the operations are supported. */
3285 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
3286 slp_instance node_instance
,
3287 hash_set
<slp_tree
> &visited
,
3288 hash_set
<slp_tree
> &lvisited
,
3289 stmt_vector_for_cost
*cost_vec
)
3294 /* Assume we can code-generate all invariants. */
3295 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3298 /* If we already analyzed the exact same set of scalar stmts we're done.
3299 We share the generated vector stmts for those. */
3300 if (visited
.contains (node
)
3301 || lvisited
.add (node
))
3305 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3307 res
= vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
3308 visited
, lvisited
, cost_vec
);
3314 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
3317 lvisited
.remove (node
);
3319 /* When the node can be vectorized cost invariant nodes it references.
3320 This is not done in DFS order to allow the refering node
3321 vectorizable_* calls to nail down the invariant nodes vector type
3322 and possibly unshare it if it needs a different vector type than
3325 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
3326 if ((SLP_TREE_DEF_TYPE (child
) == vect_constant_def
3327 || SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
3328 /* Perform usual caching, note code-generation still
3329 code-gens these nodes multiple times but we expect
3330 to CSE them later. */
3331 && !visited
.contains (child
)
3332 && !lvisited
.add (child
))
3334 /* ??? After auditing more code paths make a "default"
3335 and push the vector type from NODE to all children
3336 if it is not already set. */
3337 /* Compute the number of vectors to be generated. */
3338 tree vector_type
= SLP_TREE_VECTYPE (child
);
3341 /* For shifts with a scalar argument we don't need
3342 to cost or code-generate anything.
3343 ??? Represent this more explicitely. */
3344 gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
3345 == shift_vec_info_type
)
3349 unsigned group_size
= SLP_TREE_LANES (child
);
3351 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
3352 vf
= loop_vinfo
->vectorization_factor
;
3353 SLP_TREE_NUMBER_OF_VEC_STMTS (child
)
3354 = vect_get_num_vectors (vf
* group_size
, vector_type
);
3355 /* And cost them. */
3356 vect_prologue_cost_for_slp (child
, cost_vec
);
3359 /* If this node or any of its children can't be vectorized, try pruning
3360 the tree here rather than felling the whole thing. */
3361 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
3363 /* We'll need to revisit this for invariant costing and number
3364 of vectorized stmt setting. */
3372 /* Mark lanes of NODE that are live outside of the basic-block vectorized
3373 region and that can be vectorized using vectorizable_live_operation
3374 with STMT_VINFO_LIVE_P. Not handled live operations will cause the
3375 scalar code computing it to be retained. */
3378 vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo
, slp_tree node
,
3379 slp_instance instance
,
3380 stmt_vector_for_cost
*cost_vec
,
3381 hash_set
<stmt_vec_info
> &svisited
)
3384 stmt_vec_info stmt_info
;
3385 stmt_vec_info last_stmt
= vect_find_last_scalar_stmt_in_slp (node
);
3386 bool all_visited
= true;
3387 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3389 if (svisited
.contains (stmt_info
))
3391 all_visited
= false;
3392 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
3393 if (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
)
3394 && STMT_VINFO_RELATED_STMT (orig_stmt_info
) != stmt_info
)
3395 /* Only the pattern root stmt computes the original scalar value. */
3397 bool mark_visited
= true;
3398 gimple
*orig_stmt
= orig_stmt_info
->stmt
;
3399 ssa_op_iter op_iter
;
3400 def_operand_p def_p
;
3401 FOR_EACH_SSA_DEF_OPERAND (def_p
, orig_stmt
, op_iter
, SSA_OP_DEF
)
3403 imm_use_iterator use_iter
;
3405 stmt_vec_info use_stmt_info
;
3406 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3407 if (!is_gimple_debug (use_stmt
))
3409 use_stmt_info
= bb_vinfo
->lookup_stmt (use_stmt
);
3411 || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info
)))
3413 STMT_VINFO_LIVE_P (stmt_info
) = true;
3414 if (vectorizable_live_operation (bb_vinfo
, stmt_info
,
3415 NULL
, node
, instance
, i
,
3417 /* ??? So we know we can vectorize the live stmt
3418 from one SLP node. If we cannot do so from all
3419 or none consistently we'd have to record which
3420 SLP node (and lane) we want to use for the live
3421 operation. So make sure we can code-generate
3423 mark_visited
= false;
3425 STMT_VINFO_LIVE_P (stmt_info
) = false;
3426 BREAK_FROM_IMM_USE_STMT (use_iter
);
3429 /* We have to verify whether we can insert the lane extract
3430 before all uses. The following is a conservative approximation.
3431 We cannot put this into vectorizable_live_operation because
3432 iterating over all use stmts from inside a FOR_EACH_IMM_USE_STMT
3434 Note that while the fact that we emit code for loads at the
3435 first load should make this a non-problem leafs we construct
3436 from scalars are vectorized after the last scalar def.
3437 ??? If we'd actually compute the insert location during
3438 analysis we could use sth less conservative than the last
3439 scalar stmt in the node for the dominance check. */
3440 /* ??? What remains is "live" uses in vector CTORs in the same
3441 SLP graph which is where those uses can end up code-generated
3442 right after their definition instead of close to their original
3443 use. But that would restrict us to code-generate lane-extracts
3444 from the latest stmt in a node. So we compensate for this
3445 during code-generation, simply not replacing uses for those
3446 hopefully rare cases. */
3447 if (STMT_VINFO_LIVE_P (stmt_info
))
3448 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3449 if (!is_gimple_debug (use_stmt
)
3450 && (!(use_stmt_info
= bb_vinfo
->lookup_stmt (use_stmt
))
3451 || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info
)))
3452 && !vect_stmt_dominates_stmt_p (last_stmt
->stmt
, use_stmt
))
3454 if (dump_enabled_p ())
3455 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3456 "Cannot determine insertion place for "
3458 STMT_VINFO_LIVE_P (stmt_info
) = false;
3459 mark_visited
= true;
3463 svisited
.add (stmt_info
);
3469 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3470 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3471 vect_bb_slp_mark_live_stmts (bb_vinfo
, child
, instance
,
3472 cost_vec
, svisited
);
3475 /* Analyze statements in SLP instances of VINFO. Return true if the
3476 operations are supported. */
3479 vect_slp_analyze_operations (vec_info
*vinfo
)
3481 slp_instance instance
;
3484 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
3486 hash_set
<slp_tree
> visited
;
3487 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
3489 hash_set
<slp_tree
> lvisited
;
3490 stmt_vector_for_cost cost_vec
;
3491 cost_vec
.create (2);
3492 if (is_a
<bb_vec_info
> (vinfo
))
3493 vect_location
= instance
->location ();
3494 if (!vect_slp_analyze_node_operations (vinfo
,
3495 SLP_INSTANCE_TREE (instance
),
3496 instance
, visited
, lvisited
,
3498 /* Instances with a root stmt require vectorized defs for the
3500 || (SLP_INSTANCE_ROOT_STMT (instance
)
3501 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
3502 != vect_internal_def
)))
3504 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3505 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3506 if (dump_enabled_p ())
3507 dump_printf_loc (MSG_NOTE
, vect_location
,
3508 "removing SLP instance operations starting from: %G",
3510 vect_free_slp_instance (instance
);
3511 vinfo
->slp_instances
.ordered_remove (i
);
3512 cost_vec
.release ();
3516 for (hash_set
<slp_tree
>::iterator x
= lvisited
.begin();
3517 x
!= lvisited
.end(); ++x
)
3521 /* For BB vectorization remember the SLP graph entry
3523 if (is_a
<bb_vec_info
> (vinfo
))
3524 instance
->cost_vec
= cost_vec
;
3527 add_stmt_costs (vinfo
, vinfo
->target_cost_data
, &cost_vec
);
3528 cost_vec
.release ();
3533 /* Compute vectorizable live stmts. */
3534 if (bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
))
3536 hash_set
<stmt_vec_info
> svisited
;
3537 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); ++i
)
3539 vect_location
= instance
->location ();
3540 vect_bb_slp_mark_live_stmts (bb_vinfo
, SLP_INSTANCE_TREE (instance
),
3541 instance
, &instance
->cost_vec
, svisited
);
3545 return !vinfo
->slp_instances
.is_empty ();
3548 /* Get the SLP instance leader from INSTANCE_LEADER thereby transitively
3549 closing the eventual chain. */
3552 get_ultimate_leader (slp_instance instance
,
3553 hash_map
<slp_instance
, slp_instance
> &instance_leader
)
3555 auto_vec
<slp_instance
*, 8> chain
;
3557 while (*(tem
= instance_leader
.get (instance
)) != instance
)
3559 chain
.safe_push (tem
);
3562 while (!chain
.is_empty ())
3563 *chain
.pop () = instance
;
3567 /* Worker of vect_bb_partition_graph, recurse on NODE. */
3570 vect_bb_partition_graph_r (bb_vec_info bb_vinfo
,
3571 slp_instance instance
, slp_tree node
,
3572 hash_map
<stmt_vec_info
, slp_instance
> &stmt_to_instance
,
3573 hash_map
<slp_instance
, slp_instance
> &instance_leader
,
3574 hash_set
<slp_tree
> &visited
)
3576 stmt_vec_info stmt_info
;
3579 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3582 slp_instance
&stmt_instance
3583 = stmt_to_instance
.get_or_insert (stmt_info
, &existed_p
);
3586 else if (stmt_instance
!= instance
)
3588 /* If we're running into a previously marked stmt make us the
3589 leader of the current ultimate leader. This keeps the
3590 leader chain acyclic and works even when the current instance
3591 connects two previously independent graph parts. */
3592 slp_instance stmt_leader
3593 = get_ultimate_leader (stmt_instance
, instance_leader
);
3594 if (stmt_leader
!= instance
)
3595 instance_leader
.put (stmt_leader
, instance
);
3597 stmt_instance
= instance
;
3600 if (visited
.add (node
))
3604 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3605 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3606 vect_bb_partition_graph_r (bb_vinfo
, instance
, child
, stmt_to_instance
,
3607 instance_leader
, visited
);
3610 /* Partition the SLP graph into pieces that can be costed independently. */
3613 vect_bb_partition_graph (bb_vec_info bb_vinfo
)
3615 DUMP_VECT_SCOPE ("vect_bb_partition_graph");
3617 /* First walk the SLP graph assigning each involved scalar stmt a
3618 corresponding SLP graph entry and upon visiting a previously
3619 marked stmt, make the stmts leader the current SLP graph entry. */
3620 hash_map
<stmt_vec_info
, slp_instance
> stmt_to_instance
;
3621 hash_map
<slp_instance
, slp_instance
> instance_leader
;
3622 hash_set
<slp_tree
> visited
;
3623 slp_instance instance
;
3624 for (unsigned i
= 0; bb_vinfo
->slp_instances
.iterate (i
, &instance
); ++i
)
3626 instance_leader
.put (instance
, instance
);
3627 vect_bb_partition_graph_r (bb_vinfo
,
3628 instance
, SLP_INSTANCE_TREE (instance
),
3629 stmt_to_instance
, instance_leader
,
3633 /* Then collect entries to each independent subgraph. */
3634 for (unsigned i
= 0; bb_vinfo
->slp_instances
.iterate (i
, &instance
); ++i
)
3636 slp_instance leader
= get_ultimate_leader (instance
, instance_leader
);
3637 leader
->subgraph_entries
.safe_push (instance
);
3638 if (dump_enabled_p ()
3639 && leader
!= instance
)
3640 dump_printf_loc (MSG_NOTE
, vect_location
,
3641 "instance %p is leader of %p\n",
3646 /* Compute the scalar cost of the SLP node NODE and its children
3647 and return it. Do not account defs that are marked in LIFE and
3648 update LIFE according to uses of NODE. */
3651 vect_bb_slp_scalar_cost (vec_info
*vinfo
,
3652 slp_tree node
, vec
<bool, va_heap
> *life
,
3653 stmt_vector_for_cost
*cost_vec
,
3654 hash_set
<slp_tree
> &visited
)
3657 stmt_vec_info stmt_info
;
3660 if (visited
.add (node
))
3663 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3665 ssa_op_iter op_iter
;
3666 def_operand_p def_p
;
3671 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
3672 gimple
*orig_stmt
= orig_stmt_info
->stmt
;
3674 /* If there is a non-vectorized use of the defs then the scalar
3675 stmt is kept live in which case we do not account it or any
3676 required defs in the SLP children in the scalar cost. This
3677 way we make the vectorization more costly when compared to
3679 if (!STMT_VINFO_LIVE_P (stmt_info
))
3681 FOR_EACH_SSA_DEF_OPERAND (def_p
, orig_stmt
, op_iter
, SSA_OP_DEF
)
3683 imm_use_iterator use_iter
;
3685 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3686 if (!is_gimple_debug (use_stmt
))
3688 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
3691 (vect_stmt_to_vectorize (use_stmt_info
)))
3694 BREAK_FROM_IMM_USE_STMT (use_iter
);
3702 /* Count scalar stmts only once. */
3703 if (gimple_visited_p (orig_stmt
))
3705 gimple_set_visited (orig_stmt
, true);
3707 vect_cost_for_stmt kind
;
3708 if (STMT_VINFO_DATA_REF (orig_stmt_info
))
3710 if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info
)))
3713 kind
= scalar_store
;
3715 else if (vect_nop_conversion_p (orig_stmt_info
))
3719 record_stmt_cost (cost_vec
, 1, kind
, orig_stmt_info
, 0, vect_body
);
3722 auto_vec
<bool, 20> subtree_life
;
3723 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3725 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3727 /* Do not directly pass LIFE to the recursive call, copy it to
3728 confine changes in the callee to the current child/subtree. */
3729 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
3731 subtree_life
.safe_grow_cleared (SLP_TREE_LANES (child
), true);
3732 for (unsigned j
= 0;
3733 j
< SLP_TREE_LANE_PERMUTATION (node
).length (); ++j
)
3735 auto perm
= SLP_TREE_LANE_PERMUTATION (node
)[j
];
3736 if (perm
.first
== i
)
3737 subtree_life
[perm
.second
] = (*life
)[j
];
3742 gcc_assert (SLP_TREE_LANES (node
) == SLP_TREE_LANES (child
));
3743 subtree_life
.safe_splice (*life
);
3745 vect_bb_slp_scalar_cost (vinfo
, child
, &subtree_life
, cost_vec
,
3747 subtree_life
.truncate (0);
3752 /* Check if vectorization of the basic block is profitable for the
3753 subgraph denoted by SLP_INSTANCES. */
3756 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
,
3757 vec
<slp_instance
> slp_instances
)
3759 slp_instance instance
;
3761 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
3762 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
3764 void *vect_target_cost_data
= init_cost (NULL
);
3766 /* Calculate scalar cost and sum the cost for the vector stmts
3767 previously collected. */
3768 stmt_vector_for_cost scalar_costs
;
3769 scalar_costs
.create (0);
3770 hash_set
<slp_tree
> visited
;
3771 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3773 auto_vec
<bool, 20> life
;
3774 life
.safe_grow_cleared (SLP_TREE_LANES (SLP_INSTANCE_TREE (instance
)),
3776 vect_bb_slp_scalar_cost (bb_vinfo
,
3777 SLP_INSTANCE_TREE (instance
),
3778 &life
, &scalar_costs
, visited
);
3779 add_stmt_costs (bb_vinfo
, vect_target_cost_data
, &instance
->cost_vec
);
3780 instance
->cost_vec
.release ();
3782 /* Unset visited flag. */
3783 stmt_info_for_cost
*si
;
3784 FOR_EACH_VEC_ELT (scalar_costs
, i
, si
)
3785 gimple_set_visited (si
->stmt_info
->stmt
, false);
3787 void *scalar_target_cost_data
= init_cost (NULL
);
3788 add_stmt_costs (bb_vinfo
, scalar_target_cost_data
, &scalar_costs
);
3789 scalar_costs
.release ();
3791 finish_cost (scalar_target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
3792 destroy_cost_data (scalar_target_cost_data
);
3794 /* Complete the target-specific vector cost calculation. */
3795 finish_cost (vect_target_cost_data
, &vec_prologue_cost
,
3796 &vec_inside_cost
, &vec_epilogue_cost
);
3797 destroy_cost_data (vect_target_cost_data
);
3799 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
3801 if (dump_enabled_p ())
3803 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3804 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
3806 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
3807 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
3808 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
3811 /* Vectorization is profitable if its cost is more than the cost of scalar
3812 version. Note that we err on the vector side for equal cost because
3813 the cost estimate is otherwise quite pessimistic (constant uses are
3814 free on the scalar side but cost a load on the vector side for
3816 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3822 /* Find any vectorizable constructors and add them to the grouped_store
3826 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
3828 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
3829 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb_vinfo
->bbs
[i
]);
3830 !gsi_end_p (gsi
); gsi_next (&gsi
))
3832 gassign
*assign
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3833 if (!assign
|| gimple_assign_rhs_code (assign
) != CONSTRUCTOR
)
3836 tree rhs
= gimple_assign_rhs1 (assign
);
3837 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
3838 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
3839 CONSTRUCTOR_NELTS (rhs
))
3840 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
3841 || uniform_vector_p (rhs
))
3844 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (assign
);
3845 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3849 /* Check if the region described by BB_VINFO can be vectorized, returning
3850 true if so. When returning false, set FATAL to true if the same failure
3851 would prevent vectorization at other vector sizes, false if it is still
3852 worth trying other sizes. N_STMTS is the number of statements in the
3856 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
,
3857 vec
<int> *dataref_groups
)
3859 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3861 slp_instance instance
;
3863 poly_uint64 min_vf
= 2;
3865 /* The first group of checks is independent of the vector size. */
3868 /* Analyze the data references. */
3870 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3872 if (dump_enabled_p ())
3873 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3874 "not vectorized: unhandled data-ref in basic "
3879 if (!vect_analyze_data_ref_accesses (bb_vinfo
, dataref_groups
))
3881 if (dump_enabled_p ())
3882 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3883 "not vectorized: unhandled data access in "
3888 vect_slp_check_for_constructors (bb_vinfo
);
3890 /* If there are no grouped stores and no constructors in the region
3891 there is no need to continue with pattern recog as vect_analyze_slp
3892 will fail anyway. */
3893 if (bb_vinfo
->grouped_stores
.is_empty ())
3895 if (dump_enabled_p ())
3896 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3897 "not vectorized: no grouped stores in "
3902 /* While the rest of the analysis below depends on it in some way. */
3905 vect_pattern_recog (bb_vinfo
);
3907 /* Check the SLP opportunities in the basic block, analyze and build SLP
3909 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3911 if (dump_enabled_p ())
3913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3914 "Failed to SLP the basic block.\n");
3915 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3916 "not vectorized: failed to find SLP opportunities "
3917 "in basic block.\n");
3922 /* Optimize permutations. */
3923 vect_optimize_slp (bb_vinfo
);
3925 vect_record_base_alignments (bb_vinfo
);
3927 /* Analyze and verify the alignment of data references and the
3928 dependence in the SLP instances. */
3929 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3931 vect_location
= instance
->location ();
3932 if (! vect_slp_analyze_instance_alignment (bb_vinfo
, instance
)
3933 || ! vect_slp_analyze_instance_dependence (bb_vinfo
, instance
))
3935 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3936 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3937 if (dump_enabled_p ())
3938 dump_printf_loc (MSG_NOTE
, vect_location
,
3939 "removing SLP instance operations starting from: %G",
3941 vect_free_slp_instance (instance
);
3942 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3946 /* Mark all the statements that we want to vectorize as pure SLP and
3948 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3949 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3950 if (SLP_INSTANCE_ROOT_STMT (instance
))
3951 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3955 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3958 if (!vect_slp_analyze_operations (bb_vinfo
))
3960 if (dump_enabled_p ())
3961 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3962 "not vectorized: bad operation in basic block.\n");
3966 vect_bb_partition_graph (bb_vinfo
);
3971 /* Subroutine of vect_slp_bb. Try to vectorize the statements for all
3972 basic blocks in BBS, returning true on success.
3973 The region has N_STMTS statements and has the datarefs given by DATAREFS. */
3976 vect_slp_region (vec
<basic_block
> bbs
, vec
<data_reference_p
> datarefs
,
3977 vec
<int> *dataref_groups
, unsigned int n_stmts
)
3979 bb_vec_info bb_vinfo
;
3980 auto_vector_modes vector_modes
;
3982 /* Autodetect first vector size we try. */
3983 machine_mode next_vector_mode
= VOIDmode
;
3984 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3985 unsigned int mode_i
= 0;
3987 vec_info_shared shared
;
3989 machine_mode autodetected_vector_mode
= VOIDmode
;
3992 bool vectorized
= false;
3994 bb_vinfo
= new _bb_vec_info (bbs
, &shared
);
3996 bool first_time_p
= shared
.datarefs
.is_empty ();
3997 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3999 bb_vinfo
->shared
->save_datarefs ();
4001 bb_vinfo
->shared
->check_datarefs ();
4002 bb_vinfo
->vector_mode
= next_vector_mode
;
4004 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
, dataref_groups
)
4005 && dbg_cnt (vect_slp
))
4007 if (dump_enabled_p ())
4009 dump_printf_loc (MSG_NOTE
, vect_location
,
4010 "***** Analysis succeeded with vector mode"
4011 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
4012 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
4015 bb_vinfo
->shared
->check_datarefs ();
4018 slp_instance instance
;
4019 FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo
), i
, instance
)
4021 if (instance
->subgraph_entries
.is_empty ())
4024 vect_location
= instance
->location ();
4025 if (!unlimited_cost_model (NULL
)
4026 && !vect_bb_vectorization_profitable_p
4027 (bb_vinfo
, instance
->subgraph_entries
))
4029 if (dump_enabled_p ())
4030 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4031 "not vectorized: vectorization is not "
4036 if (!vectorized
&& dump_enabled_p ())
4037 dump_printf_loc (MSG_NOTE
, vect_location
,
4038 "Basic block will be vectorized "
4042 vect_schedule_slp (bb_vinfo
, instance
->subgraph_entries
);
4044 unsigned HOST_WIDE_INT bytes
;
4045 if (dump_enabled_p ())
4048 (bb_vinfo
->vector_mode
).is_constant (&bytes
))
4049 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
4050 "basic block part vectorized using %wu "
4051 "byte vectors\n", bytes
);
4053 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
4054 "basic block part vectorized using "
4055 "variable length vectors\n");
4061 if (dump_enabled_p ())
4062 dump_printf_loc (MSG_NOTE
, vect_location
,
4063 "***** Analysis failed with vector mode %s\n",
4064 GET_MODE_NAME (bb_vinfo
->vector_mode
));
4068 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
4071 while (mode_i
< vector_modes
.length ()
4072 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
4074 if (dump_enabled_p ())
4075 dump_printf_loc (MSG_NOTE
, vect_location
,
4076 "***** The result for vector mode %s would"
4078 GET_MODE_NAME (vector_modes
[mode_i
]));
4084 if (mode_i
< vector_modes
.length ()
4085 && VECTOR_MODE_P (autodetected_vector_mode
)
4086 && (related_vector_mode (vector_modes
[mode_i
],
4087 GET_MODE_INNER (autodetected_vector_mode
))
4088 == autodetected_vector_mode
)
4089 && (related_vector_mode (autodetected_vector_mode
,
4090 GET_MODE_INNER (vector_modes
[mode_i
]))
4091 == vector_modes
[mode_i
]))
4093 if (dump_enabled_p ())
4094 dump_printf_loc (MSG_NOTE
, vect_location
,
4095 "***** Skipping vector mode %s, which would"
4096 " repeat the analysis for %s\n",
4097 GET_MODE_NAME (vector_modes
[mode_i
]),
4098 GET_MODE_NAME (autodetected_vector_mode
));
4103 || mode_i
== vector_modes
.length ()
4104 || autodetected_vector_mode
== VOIDmode
4105 /* If vect_slp_analyze_bb_1 signaled that analysis for all
4106 vector sizes will fail do not bother iterating. */
4110 /* Try the next biggest vector size. */
4111 next_vector_mode
= vector_modes
[mode_i
++];
4112 if (dump_enabled_p ())
4113 dump_printf_loc (MSG_NOTE
, vect_location
,
4114 "***** Re-trying analysis with vector mode %s\n",
4115 GET_MODE_NAME (next_vector_mode
));
4120 /* Main entry for the BB vectorizer. Analyze and transform BBS, returns
4121 true if anything in the basic-block was vectorized. */
4124 vect_slp_bbs (vec
<basic_block
> bbs
)
4126 vec
<data_reference_p
> datarefs
= vNULL
;
4127 auto_vec
<int> dataref_groups
;
4129 int current_group
= 0;
4131 for (unsigned i
= 0; i
< bbs
.length (); i
++)
4133 basic_block bb
= bbs
[i
];
4134 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
4137 gimple
*stmt
= gsi_stmt (gsi
);
4138 if (is_gimple_debug (stmt
))
4143 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
4144 vect_location
= stmt
;
4146 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
,
4147 &dataref_groups
, current_group
))
4150 if (insns
> param_slp_max_insns_in_bb
)
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4154 "not vectorized: too many instructions in "
4160 return vect_slp_region (bbs
, datarefs
, &dataref_groups
, insns
);
4163 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
4164 true if anything in the basic-block was vectorized. */
4167 vect_slp_bb (basic_block bb
)
4169 auto_vec
<basic_block
> bbs
;
4171 return vect_slp_bbs (bbs
);
4174 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
4175 true if anything in the basic-block was vectorized. */
4178 vect_slp_function (function
*fun
)
4181 int *rpo
= XNEWVEC (int, n_basic_blocks_for_fn (fun
));
4182 unsigned n
= pre_and_rev_post_order_compute_fn (fun
, NULL
, rpo
, false);
4184 /* For the moment split the function into pieces to avoid making
4185 the iteration on the vector mode moot. Split at points we know
4186 to not handle well which is CFG merges (SLP discovery doesn't
4187 handle non-loop-header PHIs) and loop exits. Since pattern
4188 recog requires reverse iteration to visit uses before defs
4189 simply chop RPO into pieces. */
4190 auto_vec
<basic_block
> bbs
;
4191 for (unsigned i
= 0; i
< n
; i
++)
4193 basic_block bb
= BASIC_BLOCK_FOR_FN (fun
, rpo
[i
]);
4195 /* Split when a basic block has multiple predecessors or when the
4196 edge into it exits a loop (because of implementation issues with
4197 respect to placement of CTORs for externals). */
4200 if (!single_pred_p (bb
)
4201 || ((e
= single_pred_edge (bb
)),
4202 loop_exit_edge_p (e
->src
->loop_father
, e
)))
4204 /* Split when a BB is not dominated by the first block. */
4205 else if (!bbs
.is_empty ()
4206 && !dominated_by_p (CDI_DOMINATORS
, bb
, bbs
[0]))
4209 if (split
&& !bbs
.is_empty ())
4211 r
|= vect_slp_bbs (bbs
);
4213 bbs
.quick_push (bb
);
4219 if (!bbs
.is_empty ())
4220 r
|= vect_slp_bbs (bbs
);
4227 /* Build a variable-length vector in which the elements in ELTS are repeated
4228 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
4229 RESULTS and add any new instructions to SEQ.
4231 The approach we use is:
4233 (1) Find a vector mode VM with integer elements of mode IM.
4235 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
4236 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
4237 from small vectors to IM.
4239 (3) Duplicate each ELTS'[I] into a vector of mode VM.
4241 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
4242 correct byte contents.
4244 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
4246 We try to find the largest IM for which this sequence works, in order
4247 to cut down on the number of interleaves. */
4250 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
4251 vec
<tree
> elts
, unsigned int nresults
,
4254 unsigned int nelts
= elts
.length ();
4255 tree element_type
= TREE_TYPE (vector_type
);
4257 /* (1) Find a vector mode VM with integer elements of mode IM. */
4258 unsigned int nvectors
= 1;
4259 tree new_vector_type
;
4261 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
4262 &nvectors
, &new_vector_type
,
4266 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
4267 unsigned int partial_nelts
= nelts
/ nvectors
;
4268 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
4270 tree_vector_builder partial_elts
;
4271 auto_vec
<tree
, 32> pieces (nvectors
* 2);
4272 pieces
.quick_grow (nvectors
* 2);
4273 for (unsigned int i
= 0; i
< nvectors
; ++i
)
4275 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
4276 ELTS' has mode IM. */
4277 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
4278 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
4279 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
4280 tree t
= gimple_build_vector (seq
, &partial_elts
);
4281 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
4282 TREE_TYPE (new_vector_type
), t
);
4284 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
4285 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
4288 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
4289 correct byte contents.
4291 We need to repeat the following operation log2(nvectors) times:
4293 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
4294 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
4296 However, if each input repeats every N elements and the VF is
4297 a multiple of N * 2, the HI result is the same as the LO. */
4298 unsigned int in_start
= 0;
4299 unsigned int out_start
= nvectors
;
4300 unsigned int hi_start
= nvectors
/ 2;
4301 /* A bound on the number of outputs needed to produce NRESULTS results
4302 in the final iteration. */
4303 unsigned int noutputs_bound
= nvectors
* nresults
;
4304 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
4306 noutputs_bound
/= 2;
4307 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
4308 for (unsigned int i
= 0; i
< limit
; ++i
)
4311 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
4314 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
4318 tree output
= make_ssa_name (new_vector_type
);
4319 tree input1
= pieces
[in_start
+ (i
/ 2)];
4320 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
4321 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
4324 gimple_seq_add_stmt (seq
, stmt
);
4325 pieces
[out_start
+ i
] = output
;
4327 std::swap (in_start
, out_start
);
4330 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
4331 results
.reserve (nresults
);
4332 for (unsigned int i
= 0; i
< nresults
; ++i
)
4334 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
4335 pieces
[in_start
+ i
]));
4337 results
.quick_push (results
[i
- nvectors
]);
4341 /* For constant and loop invariant defs in OP_NODE this function creates
4342 vector defs that will be used in the vectorized stmts and stores them
4343 to SLP_TREE_VEC_DEFS of OP_NODE. */
4346 vect_create_constant_vectors (vec_info
*vinfo
, slp_tree op_node
)
4348 unsigned HOST_WIDE_INT nunits
;
4350 unsigned j
, number_of_places_left_in_vector
;
4353 int group_size
= op_node
->ops
.length ();
4354 unsigned int vec_num
, i
;
4355 unsigned number_of_copies
= 1;
4357 gimple_seq ctor_seq
= NULL
;
4358 auto_vec
<tree
, 16> permute_results
;
4360 /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
4361 vector_type
= SLP_TREE_VECTYPE (op_node
);
4363 unsigned int number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (op_node
);
4364 SLP_TREE_VEC_DEFS (op_node
).create (number_of_vectors
);
4365 auto_vec
<tree
> voprnds (number_of_vectors
);
4367 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
4368 created vectors. It is greater than 1 if unrolling is performed.
4370 For example, we have two scalar operands, s1 and s2 (e.g., group of
4371 strided accesses of size two), while NUNITS is four (i.e., four scalars
4372 of this type can be packed in a vector). The output vector will contain
4373 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
4376 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
4377 containing the operands.
4379 For example, NUNITS is four as before, and the group size is 8
4380 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
4381 {s5, s6, s7, s8}. */
4383 /* When using duplicate_and_interleave, we just need one element for
4384 each scalar statement. */
4385 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
4386 nunits
= group_size
;
4388 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
4390 number_of_places_left_in_vector
= nunits
;
4392 tree_vector_builder
elts (vector_type
, nunits
, 1);
4393 elts
.quick_grow (nunits
);
4394 stmt_vec_info insert_after
= NULL
;
4395 for (j
= 0; j
< number_of_copies
; j
++)
4398 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
4400 /* Create 'vect_ = {op0,op1,...,opn}'. */
4401 number_of_places_left_in_vector
--;
4403 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
4405 if (CONSTANT_CLASS_P (op
))
4407 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
4409 /* Can't use VIEW_CONVERT_EXPR for booleans because
4410 of possibly different sizes of scalar value and
4412 if (integer_zerop (op
))
4413 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
4414 else if (integer_onep (op
))
4415 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
4420 op
= fold_unary (VIEW_CONVERT_EXPR
,
4421 TREE_TYPE (vector_type
), op
);
4422 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
4426 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
4428 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
4431 = build_all_ones_cst (TREE_TYPE (vector_type
));
4433 = build_zero_cst (TREE_TYPE (vector_type
));
4434 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
4435 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
4441 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
4444 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
4447 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
4451 elts
[number_of_places_left_in_vector
] = op
;
4452 if (!CONSTANT_CLASS_P (op
))
4454 /* For BB vectorization we have to compute an insert location
4455 when a def is inside the analyzed region since we cannot
4456 simply insert at the BB start in this case. */
4457 stmt_vec_info opdef
;
4458 if (TREE_CODE (orig_op
) == SSA_NAME
4459 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
4460 && is_a
<bb_vec_info
> (vinfo
)
4461 && (opdef
= vinfo
->lookup_def (orig_op
)))
4464 insert_after
= opdef
;
4466 insert_after
= get_later_stmt (insert_after
, opdef
);
4469 if (number_of_places_left_in_vector
== 0)
4472 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
4473 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
4474 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
4477 if (permute_results
.is_empty ())
4478 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
4479 elts
, number_of_vectors
,
4481 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
4483 if (!gimple_seq_empty_p (ctor_seq
))
4487 gimple_stmt_iterator gsi
;
4488 if (gimple_code (insert_after
->stmt
) == GIMPLE_PHI
)
4490 gsi
= gsi_after_labels (gimple_bb (insert_after
->stmt
));
4491 gsi_insert_seq_before (&gsi
, ctor_seq
,
4492 GSI_CONTINUE_LINKING
);
4494 else if (!stmt_ends_bb_p (insert_after
->stmt
))
4496 gsi
= gsi_for_stmt (insert_after
->stmt
);
4497 gsi_insert_seq_after (&gsi
, ctor_seq
,
4498 GSI_CONTINUE_LINKING
);
4502 /* When we want to insert after a def where the
4503 defining stmt throws then insert on the fallthru
4505 edge e
= find_fallthru_edge
4506 (gimple_bb (insert_after
->stmt
)->succs
);
4508 = gsi_insert_seq_on_edge_immediate (e
, ctor_seq
);
4509 gcc_assert (!new_bb
);
4513 vinfo
->insert_seq_on_entry (NULL
, ctor_seq
);
4516 voprnds
.quick_push (vec_cst
);
4517 insert_after
= NULL
;
4518 number_of_places_left_in_vector
= nunits
;
4520 elts
.new_vector (vector_type
, nunits
, 1);
4521 elts
.quick_grow (nunits
);
4526 /* Since the vectors are created in the reverse order, we should invert
4528 vec_num
= voprnds
.length ();
4529 for (j
= vec_num
; j
!= 0; j
--)
4531 vop
= voprnds
[j
- 1];
4532 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
4535 /* In case that VF is greater than the unrolling factor needed for the SLP
4536 group of stmts, NUMBER_OF_VECTORS to be created is greater than
4537 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
4538 to replicate the vectors. */
4539 while (number_of_vectors
> SLP_TREE_VEC_DEFS (op_node
).length ())
4540 for (i
= 0; SLP_TREE_VEC_DEFS (op_node
).iterate (i
, &vop
) && i
< vec_num
;
4542 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
4545 /* Get the Ith vectorized definition from SLP_NODE. */
4548 vect_get_slp_vect_def (slp_tree slp_node
, unsigned i
)
4550 if (SLP_TREE_VEC_STMTS (slp_node
).exists ())
4551 return gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]);
4553 return SLP_TREE_VEC_DEFS (slp_node
)[i
];
4556 /* Get the vectorized definitions of SLP_NODE in *VEC_DEFS. */
4559 vect_get_slp_defs (slp_tree slp_node
, vec
<tree
> *vec_defs
)
4561 vec_defs
->create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
));
4562 if (SLP_TREE_DEF_TYPE (slp_node
) == vect_internal_def
)
4565 gimple
*vec_def_stmt
;
4566 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), j
, vec_def_stmt
)
4567 vec_defs
->quick_push (gimple_get_lhs (vec_def_stmt
));
4570 vec_defs
->splice (SLP_TREE_VEC_DEFS (slp_node
));
4573 /* Get N vectorized definitions for SLP_NODE. */
4576 vect_get_slp_defs (vec_info
*,
4577 slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
4580 n
= SLP_TREE_CHILDREN (slp_node
).length ();
4582 for (unsigned i
= 0; i
< n
; ++i
)
4584 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
4585 vec
<tree
> vec_defs
= vNULL
;
4586 vect_get_slp_defs (child
, &vec_defs
);
4587 vec_oprnds
->quick_push (vec_defs
);
4591 /* Generate vector permute statements from a list of loads in DR_CHAIN.
4592 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
4593 permute statements for the SLP node NODE. */
4596 vect_transform_slp_perm_load (vec_info
*vinfo
,
4597 slp_tree node
, vec
<tree
> dr_chain
,
4598 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
4599 bool analyze_only
, unsigned *n_perms
)
4601 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
4603 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4604 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
4605 unsigned int mask_element
;
4608 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4611 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
4613 mode
= TYPE_MODE (vectype
);
4614 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4616 /* Initialize the vect stmts of NODE to properly insert the generated
4619 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
4620 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
4621 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
4623 /* Generate permutation masks for every NODE. Number of masks for each NODE
4624 is equal to GROUP_SIZE.
4625 E.g., we have a group of three nodes with three loads from the same
4626 location in each node, and the vector size is 4. I.e., we have a
4627 a0b0c0a1b1c1... sequence and we need to create the following vectors:
4628 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
4629 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
4632 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
4633 The last mask is illegal since we assume two operands for permute
4634 operation, and the mask element values can't be outside that range.
4635 Hence, the last mask must be converted into {2,5,5,5}.
4636 For the first two permutations we need the first and the second input
4637 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
4638 we need the second and the third vectors: {b1,c1,a2,b2} and
4641 int vect_stmts_counter
= 0;
4642 unsigned int index
= 0;
4643 int first_vec_index
= -1;
4644 int second_vec_index
= -1;
4648 vec_perm_builder mask
;
4649 unsigned int nelts_to_build
;
4650 unsigned int nvectors_per_build
;
4651 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
4652 && multiple_p (nunits
, group_size
));
4655 /* A single vector contains a whole number of copies of the node, so:
4656 (a) all permutes can use the same mask; and
4657 (b) the permutes only need a single vector input. */
4658 mask
.new_vector (nunits
, group_size
, 3);
4659 nelts_to_build
= mask
.encoded_nelts ();
4660 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
4664 /* We need to construct a separate mask for each vector statement. */
4665 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
4666 if (!nunits
.is_constant (&const_nunits
)
4667 || !vf
.is_constant (&const_vf
))
4669 mask
.new_vector (const_nunits
, const_nunits
, 1);
4670 nelts_to_build
= const_vf
* group_size
;
4671 nvectors_per_build
= 1;
4674 unsigned int count
= mask
.encoded_nelts ();
4675 mask
.quick_grow (count
);
4676 vec_perm_indices indices
;
4678 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
4680 unsigned int iter_num
= j
/ group_size
;
4681 unsigned int stmt_num
= j
% group_size
;
4682 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
4683 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
4686 first_vec_index
= 0;
4691 /* Enforced before the loop when !repeating_p. */
4692 unsigned int const_nunits
= nunits
.to_constant ();
4693 vec_index
= i
/ const_nunits
;
4694 mask_element
= i
% const_nunits
;
4695 if (vec_index
== first_vec_index
4696 || first_vec_index
== -1)
4698 first_vec_index
= vec_index
;
4700 else if (vec_index
== second_vec_index
4701 || second_vec_index
== -1)
4703 second_vec_index
= vec_index
;
4704 mask_element
+= const_nunits
;
4708 if (dump_enabled_p ())
4709 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4710 "permutation requires at "
4711 "least three vectors %G",
4713 gcc_assert (analyze_only
);
4717 gcc_assert (mask_element
< 2 * const_nunits
);
4720 if (mask_element
!= index
)
4722 mask
[index
++] = mask_element
;
4724 if (index
== count
&& !noop_p
)
4726 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
4727 if (!can_vec_perm_const_p (mode
, indices
))
4729 if (dump_enabled_p ())
4731 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4733 "unsupported vect permute { ");
4734 for (i
= 0; i
< count
; ++i
)
4736 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4737 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4739 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4741 gcc_assert (analyze_only
);
4752 tree mask_vec
= NULL_TREE
;
4755 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4757 if (second_vec_index
== -1)
4758 second_vec_index
= first_vec_index
;
4760 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
4762 /* Generate the permute statement if necessary. */
4763 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
4764 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
4768 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4770 = vect_create_destination_var (gimple_assign_lhs (stmt
),
4772 perm_dest
= make_ssa_name (perm_dest
);
4774 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4775 first_vec
, second_vec
,
4777 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
,
4781 /* If mask was NULL_TREE generate the requested
4782 identity transform. */
4783 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
4785 /* Store the vector statement in NODE. */
4786 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
4791 first_vec_index
= -1;
4792 second_vec_index
= -1;
4801 /* Vectorize the SLP permutations in NODE as specified
4802 in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
4803 child number and lane number.
4804 Interleaving of two two-lane two-child SLP subtrees (not supported):
4805 [ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
4806 A blend of two four-lane two-child SLP subtrees:
4807 [ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
4808 Highpart of a four-lane one-child SLP subtree (not supported):
4809 [ { 0, 2 }, { 0, 3 } ]
4810 Where currently only a subset is supported by code generating below. */
4813 vectorizable_slp_permutation (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
4814 slp_tree node
, stmt_vector_for_cost
*cost_vec
)
4816 tree vectype
= SLP_TREE_VECTYPE (node
);
4818 /* ??? We currently only support all same vector input and output types
4819 while the SLP IL should really do a concat + select and thus accept
4820 arbitrary mismatches. */
4823 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4824 if (!types_compatible_p (SLP_TREE_VECTYPE (child
), vectype
))
4826 if (dump_enabled_p ())
4827 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4828 "Unsupported lane permutation\n");
4832 vec
<std::pair
<unsigned, unsigned> > &perm
= SLP_TREE_LANE_PERMUTATION (node
);
4833 gcc_assert (perm
.length () == SLP_TREE_LANES (node
));
4834 if (dump_enabled_p ())
4836 dump_printf_loc (MSG_NOTE
, vect_location
,
4837 "vectorizing permutation");
4838 for (unsigned i
= 0; i
< perm
.length (); ++i
)
4839 dump_printf (MSG_NOTE
, " op%u[%u]", perm
[i
].first
, perm
[i
].second
);
4840 dump_printf (MSG_NOTE
, "\n");
4843 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4844 if (!nunits
.is_constant ())
4846 unsigned HOST_WIDE_INT vf
= 1;
4847 if (loop_vec_info linfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4848 if (!LOOP_VINFO_VECT_FACTOR (linfo
).is_constant (&vf
))
4850 unsigned olanes
= vf
* SLP_TREE_LANES (node
);
4851 gcc_assert (multiple_p (olanes
, nunits
));
4853 /* Compute the { { SLP operand, vector index}, lane } permutation sequence
4854 from the { SLP operand, scalar lane } permutation as recorded in the
4855 SLP node as intermediate step. This part should already work
4856 with SLP children with arbitrary number of lanes. */
4857 auto_vec
<std::pair
<std::pair
<unsigned, unsigned>, unsigned> > vperm
;
4858 auto_vec
<unsigned> active_lane
;
4859 vperm
.create (olanes
);
4860 active_lane
.safe_grow_cleared (SLP_TREE_CHILDREN (node
).length (), true);
4861 for (unsigned i
= 0; i
< vf
; ++i
)
4863 for (unsigned pi
= 0; pi
< perm
.length (); ++pi
)
4865 std::pair
<unsigned, unsigned> p
= perm
[pi
];
4866 tree vtype
= SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (node
)[p
.first
]);
4867 unsigned vnunits
= TYPE_VECTOR_SUBPARTS (vtype
).to_constant ();
4868 unsigned vi
= (active_lane
[p
.first
] + p
.second
) / vnunits
;
4869 unsigned vl
= (active_lane
[p
.first
] + p
.second
) % vnunits
;
4870 vperm
.quick_push (std::make_pair (std::make_pair (p
.first
, vi
), vl
));
4872 /* Advance to the next group. */
4873 for (unsigned j
= 0; j
< SLP_TREE_CHILDREN (node
).length (); ++j
)
4874 active_lane
[j
] += SLP_TREE_LANES (SLP_TREE_CHILDREN (node
)[j
]);
4877 if (dump_enabled_p ())
4879 dump_printf_loc (MSG_NOTE
, vect_location
, "as");
4880 for (unsigned i
= 0; i
< vperm
.length (); ++i
)
4882 if (i
!= 0 && multiple_p (i
, TYPE_VECTOR_SUBPARTS (vectype
)))
4883 dump_printf (MSG_NOTE
, ",");
4884 dump_printf (MSG_NOTE
, " vops%u[%u][%u]",
4885 vperm
[i
].first
.first
, vperm
[i
].first
.second
,
4886 vperm
[i
].first
.second
);
4888 dump_printf (MSG_NOTE
, "\n");
4891 /* We can only handle two-vector permutes, everything else should
4892 be lowered on the SLP level. The following is closely inspired
4893 by vect_transform_slp_perm_load and is supposed to eventually
4895 ??? As intermediate step do code-gen in the SLP tree representation
4897 std::pair
<unsigned, unsigned> first_vec
= std::make_pair (-1U, -1U);
4898 std::pair
<unsigned, unsigned> second_vec
= std::make_pair (-1U, -1U);
4899 unsigned int const_nunits
= nunits
.to_constant ();
4900 unsigned int index
= 0;
4901 unsigned int mask_element
;
4902 vec_perm_builder mask
;
4903 mask
.new_vector (const_nunits
, const_nunits
, 1);
4904 unsigned int count
= mask
.encoded_nelts ();
4905 mask
.quick_grow (count
);
4906 vec_perm_indices indices
;
4907 unsigned nperms
= 0;
4908 for (unsigned i
= 0; i
< vperm
.length (); ++i
)
4910 mask_element
= vperm
[i
].second
;
4911 if (first_vec
.first
== -1U
4912 || first_vec
== vperm
[i
].first
)
4913 first_vec
= vperm
[i
].first
;
4914 else if (second_vec
.first
== -1U
4915 || second_vec
== vperm
[i
].first
)
4917 second_vec
= vperm
[i
].first
;
4918 mask_element
+= const_nunits
;
4922 if (dump_enabled_p ())
4923 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4924 "permutation requires at "
4925 "least three vectors");
4930 mask
[index
++] = mask_element
;
4934 indices
.new_vector (mask
, second_vec
.first
== -1U ? 1 : 2,
4936 bool identity_p
= indices
.series_p (0, 1, 0, 1);
4938 && !can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
4940 if (dump_enabled_p ())
4942 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4944 "unsupported vect permute { ");
4945 for (i
= 0; i
< count
; ++i
)
4947 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4948 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4950 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4960 if (second_vec
.first
== -1U)
4961 second_vec
= first_vec
;
4963 /* Generate the permute statement if necessary. */
4964 slp_tree first_node
= SLP_TREE_CHILDREN (node
)[first_vec
.first
];
4966 = vect_get_slp_vect_def (first_node
, first_vec
.second
);
4968 tree perm_dest
= make_ssa_name (vectype
);
4971 slp_tree second_node
4972 = SLP_TREE_CHILDREN (node
)[second_vec
.first
];
4974 = vect_get_slp_vect_def (second_node
, second_vec
.second
);
4975 tree mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4976 perm_stmt
= gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4977 first_def
, second_def
,
4981 /* We need a copy here in case the def was external. */
4982 perm_stmt
= gimple_build_assign (perm_dest
, first_def
);
4983 vect_finish_stmt_generation (vinfo
, NULL
, perm_stmt
, gsi
);
4984 /* Store the vector statement in NODE. */
4985 SLP_TREE_VEC_STMTS (node
).quick_push (perm_stmt
);
4989 first_vec
= std::make_pair (-1U, -1U);
4990 second_vec
= std::make_pair (-1U, -1U);
4995 record_stmt_cost (cost_vec
, nperms
, vec_perm
, NULL
, vectype
, 0, vect_body
);
5000 /* Vectorize SLP instance tree in postorder. */
5003 vect_schedule_slp_instance (vec_info
*vinfo
,
5004 slp_tree node
, slp_instance instance
,
5005 hash_set
<slp_tree
> &visited
)
5007 gimple_stmt_iterator si
;
5011 /* See if we have already vectorized the node in the graph of the
5013 if ((SLP_TREE_DEF_TYPE (node
) == vect_internal_def
5014 && SLP_TREE_VEC_STMTS (node
).exists ())
5015 || SLP_TREE_VEC_DEFS (node
).exists ())
5018 /* Vectorize externals and constants. */
5019 if (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
5020 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
)
5022 /* ??? vectorizable_shift can end up using a scalar operand which is
5023 currently denoted as !SLP_TREE_VECTYPE. No need to vectorize the
5024 node in this case. */
5025 if (!SLP_TREE_VECTYPE (node
))
5028 vect_create_constant_vectors (vinfo
, node
);
5032 /* ??? If we'd have a way to mark backedges that would be cheaper. */
5033 if (visited
.add (node
))
5036 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
5037 vect_schedule_slp_instance (vinfo
, child
, instance
, visited
);
5039 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
5040 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
5042 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
5043 if (dump_enabled_p ())
5044 dump_printf_loc (MSG_NOTE
, vect_location
,
5045 "------>vectorizing SLP node starting from: %G",
5048 if (STMT_VINFO_DATA_REF (stmt_info
)
5049 && SLP_TREE_CODE (node
) != VEC_PERM_EXPR
)
5051 /* Vectorized loads go before the first scalar load to make it
5052 ready early, vectorized stores go before the last scalar
5053 stmt which is where all uses are ready. */
5054 stmt_vec_info last_stmt_info
= NULL
;
5055 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
5056 last_stmt_info
= vect_find_first_scalar_stmt_in_slp (node
);
5057 else /* DR_IS_WRITE */
5058 last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
5059 si
= gsi_for_stmt (last_stmt_info
->stmt
);
5061 else if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
5062 == cycle_phi_info_type
)
5063 || (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
5064 == induc_vec_info_type
))
5066 /* For reduction and induction PHIs we do not use the
5067 insertion iterator. */
5072 /* Emit other stmts after the children vectorized defs which is
5073 earliest possible. */
5074 gimple
*last_stmt
= NULL
;
5075 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
5076 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
5078 /* For fold-left reductions we are retaining the scalar
5079 reduction PHI but we still have SLP_TREE_NUM_VEC_STMTS
5080 set so the representation isn't perfect. Resort to the
5081 last scalar def here. */
5082 if (SLP_TREE_VEC_STMTS (child
).is_empty ())
5084 gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (child
))
5085 == cycle_phi_info_type
);
5086 gphi
*phi
= as_a
<gphi
*>
5087 (vect_find_last_scalar_stmt_in_slp (child
)->stmt
);
5089 || vect_stmt_dominates_stmt_p (last_stmt
, phi
))
5092 /* We are emitting all vectorized stmts in the same place and
5093 the last one is the last.
5094 ??? Unless we have a load permutation applied and that
5095 figures to re-use an earlier generated load. */
5098 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (child
), j
, vstmt
)
5100 || vect_stmt_dominates_stmt_p (last_stmt
, vstmt
))
5103 else if (!SLP_TREE_VECTYPE (child
))
5105 /* For externals we use unvectorized at all scalar defs. */
5108 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (child
), j
, def
)
5109 if (TREE_CODE (def
) == SSA_NAME
5110 && !SSA_NAME_IS_DEFAULT_DEF (def
))
5112 gimple
*stmt
= SSA_NAME_DEF_STMT (def
);
5114 || vect_stmt_dominates_stmt_p (last_stmt
, stmt
))
5120 /* For externals we have to look at all defs since their
5121 insertion place is decided per vector. But beware
5122 of pre-existing vectors where we need to make sure
5123 we do not insert before the region boundary. */
5124 if (SLP_TREE_SCALAR_OPS (child
).is_empty ()
5125 && !vinfo
->lookup_def (SLP_TREE_VEC_DEFS (child
)[0]))
5126 last_stmt
= gsi_stmt (gsi_after_labels
5127 (as_a
<bb_vec_info
> (vinfo
)->bbs
[0]));
5132 FOR_EACH_VEC_ELT (SLP_TREE_VEC_DEFS (child
), j
, vdef
)
5133 if (TREE_CODE (vdef
) == SSA_NAME
5134 && !SSA_NAME_IS_DEFAULT_DEF (vdef
))
5136 gimple
*vstmt
= SSA_NAME_DEF_STMT (vdef
);
5138 || vect_stmt_dominates_stmt_p (last_stmt
, vstmt
))
5143 /* This can happen when all children are pre-existing vectors or
5146 last_stmt
= vect_find_first_scalar_stmt_in_slp (node
)->stmt
;
5147 if (is_a
<gphi
*> (last_stmt
))
5148 si
= gsi_after_labels (gimple_bb (last_stmt
));
5151 si
= gsi_for_stmt (last_stmt
);
5156 bool done_p
= false;
5158 /* Handle purely internal nodes. */
5159 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
5161 /* ??? the transform kind is stored to STMT_VINFO_TYPE which might
5162 be shared with different SLP nodes (but usually it's the same
5163 operation apart from the case the stmt is only there for denoting
5164 the actual scalar lane defs ...). So do not call vect_transform_stmt
5165 but open-code it here (partly). */
5166 bool done
= vectorizable_slp_permutation (vinfo
, &si
, node
, NULL
);
5171 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
5174 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
5175 For loop vectorization this is done in vectorizable_call, but for SLP
5176 it needs to be deferred until end of vect_schedule_slp, because multiple
5177 SLP instances may refer to the same scalar stmt. */
5180 vect_remove_slp_scalar_calls (vec_info
*vinfo
,
5181 slp_tree node
, hash_set
<slp_tree
> &visited
)
5184 gimple_stmt_iterator gsi
;
5188 stmt_vec_info stmt_info
;
5190 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
5193 if (visited
.add (node
))
5196 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
5197 vect_remove_slp_scalar_calls (vinfo
, child
, visited
);
5199 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
5201 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
5202 if (!stmt
|| gimple_bb (stmt
) == NULL
)
5204 if (is_pattern_stmt_p (stmt_info
)
5205 || !PURE_SLP_STMT (stmt_info
))
5207 lhs
= gimple_call_lhs (stmt
);
5208 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
5209 gsi
= gsi_for_stmt (stmt
);
5210 vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
5211 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
5216 vect_remove_slp_scalar_calls (vec_info
*vinfo
, slp_tree node
)
5218 hash_set
<slp_tree
> visited
;
5219 vect_remove_slp_scalar_calls (vinfo
, node
, visited
);
5222 /* Vectorize the instance root. */
5225 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
5227 gassign
*rstmt
= NULL
;
5229 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
5234 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt
)
5236 tree vect_lhs
= gimple_get_lhs (child_stmt
);
5237 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
5238 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
5239 TREE_TYPE (vect_lhs
)))
5240 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
5242 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
5246 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
5248 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
5251 vec
<constructor_elt
, va_gc
> *v
;
5252 vec_alloc (v
, nelts
);
5254 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt
)
5256 CONSTRUCTOR_APPEND_ELT (v
,
5258 gimple_get_lhs (child_stmt
));
5260 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
5261 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
5262 tree r_constructor
= build_constructor (rtype
, v
);
5263 rstmt
= gimple_build_assign (lhs
, r_constructor
);
5268 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
5269 gsi_replace (&rgsi
, rstmt
, true);
5272 /* Generate vector code for SLP_INSTANCES in the loop/basic block. */
5275 vect_schedule_slp (vec_info
*vinfo
, vec
<slp_instance
> slp_instances
)
5277 slp_instance instance
;
5280 hash_set
<slp_tree
> visited
;
5281 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
5283 slp_tree node
= SLP_INSTANCE_TREE (instance
);
5284 if (dump_enabled_p ())
5286 dump_printf_loc (MSG_NOTE
, vect_location
,
5287 "Vectorizing SLP tree:\n");
5288 if (SLP_INSTANCE_ROOT_STMT (instance
))
5289 dump_printf_loc (MSG_NOTE
, vect_location
, "Root stmt: %G",
5290 SLP_INSTANCE_ROOT_STMT (instance
)->stmt
);
5291 vect_print_slp_graph (MSG_NOTE
, vect_location
,
5292 SLP_INSTANCE_TREE (instance
));
5294 /* Schedule the tree of INSTANCE. */
5295 vect_schedule_slp_instance (vinfo
, node
, instance
, visited
);
5297 if (SLP_INSTANCE_ROOT_STMT (instance
))
5298 vectorize_slp_instance_root_stmt (node
, instance
);
5300 if (dump_enabled_p ())
5301 dump_printf_loc (MSG_NOTE
, vect_location
,
5302 "vectorizing stmts using SLP.\n");
5305 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
5307 slp_tree root
= SLP_INSTANCE_TREE (instance
);
5308 stmt_vec_info store_info
;
5311 /* For reductions set the latch values of the vectorized PHIs. */
5312 if (instance
->reduc_phis
5313 && STMT_VINFO_REDUC_TYPE (SLP_TREE_REPRESENTATIVE
5314 (instance
->reduc_phis
)) != FOLD_LEFT_REDUCTION
5315 && STMT_VINFO_REDUC_TYPE (SLP_TREE_REPRESENTATIVE
5316 (instance
->reduc_phis
)) != EXTRACT_LAST_REDUCTION
)
5318 slp_tree slp_node
= root
;
5319 slp_tree phi_node
= instance
->reduc_phis
;
5320 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_SCALAR_STMTS (phi_node
)[0]->stmt
);
5321 edge e
= loop_latch_edge (gimple_bb (phi
)->loop_father
);
5322 gcc_assert (SLP_TREE_VEC_STMTS (phi_node
).length ()
5323 == SLP_TREE_VEC_STMTS (slp_node
).length ());
5324 for (unsigned j
= 0; j
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++j
)
5325 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[j
]),
5326 vect_get_slp_vect_def (slp_node
, j
),
5327 e
, gimple_phi_arg_location (phi
, e
->dest_idx
));
5330 /* Remove scalar call stmts. Do not do this for basic-block
5331 vectorization as not all uses may be vectorized.
5332 ??? Why should this be necessary? DCE should be able to
5333 remove the stmts itself.
5334 ??? For BB vectorization we can as well remove scalar
5335 stmts starting from the SLP tree root if they have no
5337 if (is_a
<loop_vec_info
> (vinfo
))
5338 vect_remove_slp_scalar_calls (vinfo
, root
);
5340 /* Remove vectorized stores original scalar stmts. */
5341 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
); j
++)
5343 if (!STMT_VINFO_DATA_REF (store_info
)
5344 || !DR_IS_WRITE (STMT_VINFO_DATA_REF (store_info
)))
5347 store_info
= vect_orig_stmt (store_info
);
5348 /* Free the attached stmt_vec_info and remove the stmt. */
5349 vinfo
->remove_stmt (store_info
);