1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
47 find_bb_location (basic_block bb
)
50 gimple_stmt_iterator si
;
55 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
58 if (gimple_location (stmt
) != UNKNOWN_LOC
)
59 return gimple_location (stmt
);
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69 vect_free_slp_tree (slp_tree node
)
77 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
78 vect_free_slp_tree ((slp_tree
) child
);
80 VEC_free (slp_void_p
, heap
, SLP_TREE_CHILDREN (node
));
81 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
83 if (SLP_TREE_VEC_STMTS (node
))
84 VEC_free (gimple
, heap
, SLP_TREE_VEC_STMTS (node
));
90 /* Free the memory allocated for the SLP instance. */
93 vect_free_slp_instance (slp_instance instance
)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
96 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (instance
));
97 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
101 /* Create an SLP node for SCALAR_STMTS. */
104 vect_create_new_slp_node (VEC (gimple
, heap
) *scalar_stmts
)
107 gimple stmt
= VEC_index (gimple
, scalar_stmts
, 0);
110 if (is_gimple_call (stmt
))
111 nops
= gimple_call_num_args (stmt
);
112 else if (is_gimple_assign (stmt
))
114 nops
= gimple_num_ops (stmt
) - 1;
115 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
121 node
= XNEW (struct _slp_tree
);
122 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
123 SLP_TREE_VEC_STMTS (node
) = NULL
;
124 SLP_TREE_CHILDREN (node
) = VEC_alloc (slp_void_p
, heap
, nops
);
125 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
126 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
132 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
134 static VEC (slp_oprnd_info
, heap
) *
135 vect_create_oprnd_info (int nops
, int group_size
)
138 slp_oprnd_info oprnd_info
;
139 VEC (slp_oprnd_info
, heap
) *oprnds_info
;
141 oprnds_info
= VEC_alloc (slp_oprnd_info
, heap
, nops
);
142 for (i
= 0; i
< nops
; i
++)
144 oprnd_info
= XNEW (struct _slp_oprnd_info
);
145 oprnd_info
->def_stmts
= VEC_alloc (gimple
, heap
, group_size
);
146 oprnd_info
->first_dt
= vect_uninitialized_def
;
147 oprnd_info
->first_def_type
= NULL_TREE
;
148 oprnd_info
->first_const_oprnd
= NULL_TREE
;
149 oprnd_info
->first_pattern
= false;
150 VEC_quick_push (slp_oprnd_info
, oprnds_info
, oprnd_info
);
157 /* Free operands info. */
160 vect_free_oprnd_info (VEC (slp_oprnd_info
, heap
) **oprnds_info
)
163 slp_oprnd_info oprnd_info
;
165 FOR_EACH_VEC_ELT (slp_oprnd_info
, *oprnds_info
, i
, oprnd_info
)
167 VEC_free (gimple
, heap
, oprnd_info
->def_stmts
);
168 XDELETE (oprnd_info
);
171 VEC_free (slp_oprnd_info
, heap
, *oprnds_info
);
175 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
176 they are of a valid type and that they match the defs of the first stmt of
177 the SLP group (stored in OPRNDS_INFO). */
180 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
181 slp_tree slp_node
, gimple stmt
,
182 int ncopies_for_cost
, bool first
,
183 VEC (slp_oprnd_info
, heap
) **oprnds_info
)
186 unsigned int i
, number_of_oprnds
;
187 tree def
, def_op0
= NULL_TREE
;
189 enum vect_def_type dt
= vect_uninitialized_def
;
190 enum vect_def_type dt_op0
= vect_uninitialized_def
;
191 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
192 tree lhs
= gimple_get_lhs (stmt
);
193 struct loop
*loop
= NULL
;
194 enum tree_code rhs_code
;
195 bool different_types
= false;
196 bool pattern
= false;
197 slp_oprnd_info oprnd_info
, oprnd0_info
, oprnd1_info
;
199 tree compare_rhs
= NULL_TREE
;
202 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
204 if (is_gimple_call (stmt
))
206 number_of_oprnds
= gimple_call_num_args (stmt
);
209 else if (is_gimple_assign (stmt
))
211 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
212 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
218 for (i
= 0; i
< number_of_oprnds
; i
++)
223 compare_rhs
= NULL_TREE
;
226 oprnd
= gimple_op (stmt
, op_idx
++);
228 oprnd_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, i
);
230 if (COMPARISON_CLASS_P (oprnd
))
232 compare_rhs
= TREE_OPERAND (oprnd
, 1);
233 oprnd
= TREE_OPERAND (oprnd
, 0);
236 if (!vect_is_simple_use (oprnd
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
238 || (!def_stmt
&& dt
!= vect_constant_def
))
240 if (vect_print_dump_info (REPORT_SLP
))
242 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
243 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
249 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
250 from the pattern. Check that all the stmts of the node are in the
252 if (loop
&& def_stmt
&& gimple_bb (def_stmt
)
253 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
254 && vinfo_for_stmt (def_stmt
)
255 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
256 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
257 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
260 if (!first
&& !oprnd_info
->first_pattern
)
262 if (vect_print_dump_info (REPORT_DETAILS
))
264 fprintf (vect_dump
, "Build SLP failed: some of the stmts"
265 " are in a pattern, and others are not ");
266 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
272 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
273 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
275 if (dt
== vect_unknown_def_type
)
277 if (vect_print_dump_info (REPORT_DETAILS
))
278 fprintf (vect_dump
, "Unsupported pattern.");
282 switch (gimple_code (def_stmt
))
285 def
= gimple_phi_result (def_stmt
);
289 def
= gimple_assign_lhs (def_stmt
);
293 if (vect_print_dump_info (REPORT_DETAILS
))
294 fprintf (vect_dump
, "unsupported defining stmt: ");
301 oprnd_info
->first_dt
= dt
;
302 oprnd_info
->first_pattern
= pattern
;
305 oprnd_info
->first_def_type
= TREE_TYPE (def
);
306 oprnd_info
->first_const_oprnd
= NULL_TREE
;
310 oprnd_info
->first_def_type
= NULL_TREE
;
311 oprnd_info
->first_const_oprnd
= oprnd
;
318 /* Analyze costs (for the first stmt of the group only). */
319 if (REFERENCE_CLASS_P (lhs
))
321 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
325 enum vect_def_type dts
[2];
327 dts
[1] = vect_uninitialized_def
;
328 /* Not memory operation (we don't call this function for
330 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dts
,
337 /* Not first stmt of the group, check that the def-stmt/s match
338 the def-stmt/s of the first stmt. Allow different definition
339 types for reduction chains: the first stmt must be a
340 vect_reduction_def (a phi node), and the rest
341 vect_internal_def. */
342 if (((oprnd_info
->first_dt
!= dt
343 && !(oprnd_info
->first_dt
== vect_reduction_def
344 && dt
== vect_internal_def
))
345 || (oprnd_info
->first_def_type
!= NULL_TREE
347 && !types_compatible_p (oprnd_info
->first_def_type
,
350 && !types_compatible_p (TREE_TYPE (oprnd_info
->first_const_oprnd
),
354 if (number_of_oprnds
!= 2)
356 if (vect_print_dump_info (REPORT_SLP
))
357 fprintf (vect_dump
, "Build SLP failed: different types ");
362 /* Try to swap operands in case of binary operation. */
364 different_types
= true;
367 oprnd0_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
368 if (is_gimple_assign (stmt
)
369 && (rhs_code
= gimple_assign_rhs_code (stmt
))
370 && TREE_CODE_CLASS (rhs_code
) == tcc_binary
371 && commutative_tree_code (rhs_code
)
372 && oprnd0_info
->first_dt
== dt
373 && oprnd_info
->first_dt
== dt_op0
375 && !(oprnd0_info
->first_def_type
376 && !types_compatible_p (oprnd0_info
->first_def_type
,
378 && !(oprnd_info
->first_def_type
379 && !types_compatible_p (oprnd_info
->first_def_type
,
380 TREE_TYPE (def_op0
))))
382 if (vect_print_dump_info (REPORT_SLP
))
384 fprintf (vect_dump
, "Swapping operands of ");
385 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
388 swap_tree_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
389 gimple_assign_rhs2_ptr (stmt
));
393 if (vect_print_dump_info (REPORT_SLP
))
394 fprintf (vect_dump
, "Build SLP failed: different types ");
402 /* Check the types of the definitions. */
405 case vect_constant_def
:
406 case vect_external_def
:
407 case vect_reduction_def
:
410 case vect_internal_def
:
413 oprnd0_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
414 oprnd1_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
416 VEC_quick_push (gimple
, oprnd1_info
->def_stmts
, def_stmt
);
418 VEC_quick_push (gimple
, oprnd0_info
->def_stmts
, def_stmt
);
421 VEC_quick_push (gimple
, oprnd_info
->def_stmts
, def_stmt
);
426 /* FORNOW: Not supported. */
427 if (vect_print_dump_info (REPORT_SLP
))
429 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
430 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
441 /* Recursively build an SLP tree starting from NODE.
442 Fail (and return FALSE) if def-stmts are not isomorphic, require data
443 permutation or are of unsupported types of operation. Otherwise, return
447 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
448 slp_tree
*node
, unsigned int group_size
,
449 int *inside_cost
, int *outside_cost
,
450 int ncopies_for_cost
, unsigned int *max_nunits
,
451 VEC (int, heap
) **load_permutation
,
452 VEC (slp_tree
, heap
) **loads
,
453 unsigned int vectorization_factor
, bool *loads_permuted
)
456 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
457 gimple stmt
= VEC_index (gimple
, stmts
, 0);
458 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
459 enum tree_code first_cond_code
= ERROR_MARK
;
461 bool stop_recursion
= false, need_same_oprnds
= false;
462 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
463 unsigned int ncopies
;
466 enum machine_mode optab_op2_mode
;
467 enum machine_mode vec_mode
;
468 struct data_reference
*first_dr
;
470 bool permutation
= false;
471 unsigned int load_place
;
472 gimple first_load
, prev_first_load
= NULL
;
473 VEC (slp_oprnd_info
, heap
) *oprnds_info
;
475 slp_oprnd_info oprnd_info
;
478 if (is_gimple_call (stmt
))
479 nops
= gimple_call_num_args (stmt
);
480 else if (is_gimple_assign (stmt
))
482 nops
= gimple_num_ops (stmt
) - 1;
483 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
489 oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
491 /* For every stmt in NODE find its def stmt/s. */
492 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
494 if (vect_print_dump_info (REPORT_SLP
))
496 fprintf (vect_dump
, "Build SLP for ");
497 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
500 /* Fail to vectorize statements marked as unvectorizable. */
501 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
503 if (vect_print_dump_info (REPORT_SLP
))
506 "Build SLP failed: unvectorizable statement ");
507 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
510 vect_free_oprnd_info (&oprnds_info
);
514 lhs
= gimple_get_lhs (stmt
);
515 if (lhs
== NULL_TREE
)
517 if (vect_print_dump_info (REPORT_SLP
))
520 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
521 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
524 vect_free_oprnd_info (&oprnds_info
);
528 if (is_gimple_assign (stmt
)
529 && gimple_assign_rhs_code (stmt
) == COND_EXPR
530 && (cond
= gimple_assign_rhs1 (stmt
))
531 && !COMPARISON_CLASS_P (cond
))
533 if (vect_print_dump_info (REPORT_SLP
))
536 "Build SLP failed: condition is not comparison ");
537 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
540 vect_free_oprnd_info (&oprnds_info
);
544 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
545 vectype
= get_vectype_for_scalar_type (scalar_type
);
548 if (vect_print_dump_info (REPORT_SLP
))
550 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
551 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
554 vect_free_oprnd_info (&oprnds_info
);
558 /* In case of multiple types we need to detect the smallest type. */
559 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
561 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
563 vectorization_factor
= *max_nunits
;
566 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
568 if (is_gimple_call (stmt
))
570 rhs_code
= CALL_EXPR
;
571 if (gimple_call_internal_p (stmt
)
572 || gimple_call_tail_p (stmt
)
573 || gimple_call_noreturn_p (stmt
)
574 || !gimple_call_nothrow_p (stmt
)
575 || gimple_call_chain (stmt
))
577 if (vect_print_dump_info (REPORT_SLP
))
580 "Build SLP failed: unsupported call type ");
581 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
584 vect_free_oprnd_info (&oprnds_info
);
589 rhs_code
= gimple_assign_rhs_code (stmt
);
591 /* Check the operation. */
594 first_stmt_code
= rhs_code
;
596 /* Shift arguments should be equal in all the packed stmts for a
597 vector shift with scalar shift operand. */
598 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
599 || rhs_code
== LROTATE_EXPR
600 || rhs_code
== RROTATE_EXPR
)
602 vec_mode
= TYPE_MODE (vectype
);
604 /* First see if we have a vector/vector shift. */
605 optab
= optab_for_tree_code (rhs_code
, vectype
,
609 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
611 /* No vector/vector shift, try for a vector/scalar shift. */
612 optab
= optab_for_tree_code (rhs_code
, vectype
,
617 if (vect_print_dump_info (REPORT_SLP
))
618 fprintf (vect_dump
, "Build SLP failed: no optab.");
619 vect_free_oprnd_info (&oprnds_info
);
622 icode
= (int) optab_handler (optab
, vec_mode
);
623 if (icode
== CODE_FOR_nothing
)
625 if (vect_print_dump_info (REPORT_SLP
))
626 fprintf (vect_dump
, "Build SLP failed: "
627 "op not supported by target.");
628 vect_free_oprnd_info (&oprnds_info
);
631 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
632 if (!VECTOR_MODE_P (optab_op2_mode
))
634 need_same_oprnds
= true;
635 first_op1
= gimple_assign_rhs2 (stmt
);
639 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
641 need_same_oprnds
= true;
642 first_op1
= gimple_assign_rhs2 (stmt
);
647 if (first_stmt_code
!= rhs_code
648 && (first_stmt_code
!= IMAGPART_EXPR
649 || rhs_code
!= REALPART_EXPR
)
650 && (first_stmt_code
!= REALPART_EXPR
651 || rhs_code
!= IMAGPART_EXPR
)
652 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
))
653 && (first_stmt_code
== ARRAY_REF
654 || first_stmt_code
== INDIRECT_REF
655 || first_stmt_code
== COMPONENT_REF
656 || first_stmt_code
== MEM_REF
)))
658 if (vect_print_dump_info (REPORT_SLP
))
661 "Build SLP failed: different operation in stmt ");
662 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
665 vect_free_oprnd_info (&oprnds_info
);
670 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
672 if (vect_print_dump_info (REPORT_SLP
))
675 "Build SLP failed: different shift arguments in ");
676 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
679 vect_free_oprnd_info (&oprnds_info
);
683 if (rhs_code
== CALL_EXPR
)
685 gimple first_stmt
= VEC_index (gimple
, stmts
, 0);
686 if (gimple_call_num_args (stmt
) != nops
687 || !operand_equal_p (gimple_call_fn (first_stmt
),
688 gimple_call_fn (stmt
), 0)
689 || gimple_call_fntype (first_stmt
)
690 != gimple_call_fntype (stmt
))
692 if (vect_print_dump_info (REPORT_SLP
))
695 "Build SLP failed: different calls in ");
696 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
699 vect_free_oprnd_info (&oprnds_info
);
705 /* Strided store or load. */
706 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
708 if (REFERENCE_CLASS_P (lhs
))
711 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
,
712 stmt
, ncopies_for_cost
,
713 (i
== 0), &oprnds_info
))
715 vect_free_oprnd_info (&oprnds_info
);
722 /* FORNOW: Check that there is no gap between the loads. */
723 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
724 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
725 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
726 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
728 if (vect_print_dump_info (REPORT_SLP
))
730 fprintf (vect_dump
, "Build SLP failed: strided "
732 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
735 vect_free_oprnd_info (&oprnds_info
);
739 /* Check that the size of interleaved loads group is not
740 greater than the SLP group size. */
742 && GROUP_SIZE (vinfo_for_stmt (stmt
)) > ncopies
* group_size
)
744 if (vect_print_dump_info (REPORT_SLP
))
746 fprintf (vect_dump
, "Build SLP failed: the number of "
747 "interleaved loads is greater than"
748 " the SLP group size ");
749 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
752 vect_free_oprnd_info (&oprnds_info
);
756 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
759 /* Check that there are no loads from different interleaving
760 chains in the same node. The only exception is complex
762 if (prev_first_load
!= first_load
763 && rhs_code
!= REALPART_EXPR
764 && rhs_code
!= IMAGPART_EXPR
)
766 if (vect_print_dump_info (REPORT_SLP
))
768 fprintf (vect_dump
, "Build SLP failed: different "
769 "interleaving chains in one node ");
770 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
773 vect_free_oprnd_info (&oprnds_info
);
778 prev_first_load
= first_load
;
780 if (first_load
== stmt
)
782 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
783 if (vect_supportable_dr_alignment (first_dr
, false)
784 == dr_unaligned_unsupported
)
786 if (vect_print_dump_info (REPORT_SLP
))
788 fprintf (vect_dump
, "Build SLP failed: unsupported "
790 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
793 vect_free_oprnd_info (&oprnds_info
);
797 /* Analyze costs (for the first stmt in the group). */
798 vect_model_load_cost (vinfo_for_stmt (stmt
),
799 ncopies_for_cost
, false, *node
);
802 /* Store the place of this load in the interleaving chain. In
803 case that permutation is needed we later decide if a specific
804 permutation is supported. */
805 load_place
= vect_get_place_in_interleaving_chain (stmt
,
810 VEC_safe_push (int, heap
, *load_permutation
, load_place
);
812 /* We stop the tree when we reach a group of loads. */
813 stop_recursion
= true;
816 } /* Strided access. */
819 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
821 /* Not strided load. */
822 if (vect_print_dump_info (REPORT_SLP
))
824 fprintf (vect_dump
, "Build SLP failed: not strided load ");
825 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
828 /* FORNOW: Not strided loads are not supported. */
829 vect_free_oprnd_info (&oprnds_info
);
833 /* Not memory operation. */
834 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
835 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
836 && rhs_code
!= COND_EXPR
837 && rhs_code
!= CALL_EXPR
)
839 if (vect_print_dump_info (REPORT_SLP
))
841 fprintf (vect_dump
, "Build SLP failed: operation");
842 fprintf (vect_dump
, " unsupported ");
843 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
846 vect_free_oprnd_info (&oprnds_info
);
850 if (rhs_code
== COND_EXPR
)
852 tree cond_expr
= gimple_assign_rhs1 (stmt
);
855 first_cond_code
= TREE_CODE (cond_expr
);
856 else if (first_cond_code
!= TREE_CODE (cond_expr
))
858 if (vect_print_dump_info (REPORT_SLP
))
860 fprintf (vect_dump
, "Build SLP failed: different"
862 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
865 vect_free_oprnd_info (&oprnds_info
);
870 /* Find the def-stmts. */
871 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
, stmt
,
872 ncopies_for_cost
, (i
== 0),
875 vect_free_oprnd_info (&oprnds_info
);
881 /* Add the costs of the node to the overall instance costs. */
882 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
883 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
885 /* Strided loads were reached - stop the recursion. */
888 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
892 *loads_permuted
= true;
894 += targetm
.vectorize
.builtin_vectorization_cost (vec_perm
, NULL
, 0)
899 /* We don't check here complex numbers chains, so we set
900 LOADS_PERMUTED for further check in
901 vect_supported_load_permutation_p. */
902 if (rhs_code
== REALPART_EXPR
|| rhs_code
== IMAGPART_EXPR
)
903 *loads_permuted
= true;
906 vect_free_oprnd_info (&oprnds_info
);
910 /* Create SLP_TREE nodes for the definition node/s. */
911 FOR_EACH_VEC_ELT (slp_oprnd_info
, oprnds_info
, i
, oprnd_info
)
915 if (oprnd_info
->first_dt
!= vect_internal_def
)
918 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
920 || !vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
, group_size
,
921 inside_cost
, outside_cost
, ncopies_for_cost
,
922 max_nunits
, load_permutation
, loads
,
923 vectorization_factor
, loads_permuted
))
926 oprnd_info
->def_stmts
= NULL
;
927 vect_free_slp_tree (child
);
928 vect_free_oprnd_info (&oprnds_info
);
932 oprnd_info
->def_stmts
= NULL
;
933 VEC_quick_push (slp_void_p
, SLP_TREE_CHILDREN (*node
), child
);
936 vect_free_oprnd_info (&oprnds_info
);
942 vect_print_slp_tree (slp_tree node
)
951 fprintf (vect_dump
, "node ");
952 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
954 fprintf (vect_dump
, "\n\tstmt %d ", i
);
955 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
957 fprintf (vect_dump
, "\n");
959 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
960 vect_print_slp_tree ((slp_tree
) child
);
964 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
965 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
966 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
967 stmts in NODE are to be marked. */
970 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
979 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
981 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
983 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
984 vect_mark_slp_stmts ((slp_tree
) child
, mark
, j
);
988 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
991 vect_mark_slp_stmts_relevant (slp_tree node
)
995 stmt_vec_info stmt_info
;
1001 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1003 stmt_info
= vinfo_for_stmt (stmt
);
1004 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1005 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1006 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1009 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1010 vect_mark_slp_stmts_relevant ((slp_tree
) child
);
1014 /* Check if the permutation required by the SLP INSTANCE is supported.
1015 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1018 vect_supported_slp_permutation_p (slp_instance instance
)
1020 slp_tree node
= VEC_index (slp_tree
, SLP_INSTANCE_LOADS (instance
), 0);
1021 gimple stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1022 gimple first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
1023 VEC (slp_tree
, heap
) *sorted_loads
= NULL
;
1025 slp_tree
*tmp_loads
= NULL
;
1026 int group_size
= SLP_INSTANCE_GROUP_SIZE (instance
), i
, j
;
1029 /* FORNOW: The only supported loads permutation is loads from the same
1030 location in all the loads in the node, when the data-refs in
1031 nodes of LOADS constitute an interleaving chain.
1032 Sort the nodes according to the order of accesses in the chain. */
1033 tmp_loads
= (slp_tree
*) xmalloc (sizeof (slp_tree
) * group_size
);
1035 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance
), i
, index
)
1036 && VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), j
, load
);
1037 i
+= group_size
, j
++)
1039 gimple scalar_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (load
), 0);
1040 /* Check that the loads are all in the same interleaving chain. */
1041 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt
)) != first_load
)
1043 if (vect_print_dump_info (REPORT_DETAILS
))
1045 fprintf (vect_dump
, "Build SLP failed: unsupported data "
1047 print_gimple_stmt (vect_dump
, scalar_stmt
, 0, TDF_SLIM
);
1054 tmp_loads
[index
] = load
;
1057 sorted_loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1058 for (i
= 0; i
< group_size
; i
++)
1059 VEC_safe_push (slp_tree
, heap
, sorted_loads
, tmp_loads
[i
]);
1061 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
1062 SLP_INSTANCE_LOADS (instance
) = sorted_loads
;
1065 if (!vect_transform_slp_perm_load (stmt
, NULL
, NULL
,
1066 SLP_INSTANCE_UNROLLING_FACTOR (instance
),
1074 /* Rearrange the statements of NODE according to PERMUTATION. */
1077 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1078 VEC (int, heap
) *permutation
)
1081 VEC (gimple
, heap
) *tmp_stmts
;
1082 unsigned int index
, i
;
1088 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1089 vect_slp_rearrange_stmts ((slp_tree
) child
, group_size
, permutation
);
1091 gcc_assert (group_size
== VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
)));
1092 tmp_stmts
= VEC_alloc (gimple
, heap
, group_size
);
1094 for (i
= 0; i
< group_size
; i
++)
1095 VEC_safe_push (gimple
, heap
, tmp_stmts
, NULL
);
1097 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1099 index
= VEC_index (int, permutation
, i
);
1100 VEC_replace (gimple
, tmp_stmts
, index
, stmt
);
1103 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
1104 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1108 /* Check if the required load permutation is supported.
1109 LOAD_PERMUTATION contains a list of indices of the loads.
1110 In SLP this permutation is relative to the order of strided stores that are
1111 the base of the SLP instance. */
1114 vect_supported_load_permutation_p (slp_instance slp_instn
, int group_size
,
1115 VEC (int, heap
) *load_permutation
)
1117 int i
= 0, j
, prev
= -1, next
, k
, number_of_groups
;
1118 bool supported
, bad_permutation
= false;
1120 slp_tree node
, other_complex_node
;
1121 gimple stmt
, first
= NULL
, other_node_first
, load
, next_load
, first_load
;
1122 unsigned complex_numbers
= 0;
1123 struct data_reference
*dr
;
1124 bb_vec_info bb_vinfo
;
1126 /* FORNOW: permutations are only supported in SLP. */
1130 if (vect_print_dump_info (REPORT_SLP
))
1132 fprintf (vect_dump
, "Load permutation ");
1133 FOR_EACH_VEC_ELT (int, load_permutation
, i
, next
)
1134 fprintf (vect_dump
, "%d ", next
);
1137 /* In case of reduction every load permutation is allowed, since the order
1138 of the reduction statements is not important (as opposed to the case of
1139 strided stores). The only condition we need to check is that all the
1140 load nodes are of the same size and have the same permutation (and then
1141 rearrange all the nodes of the SLP instance according to this
1144 /* Check that all the load nodes are of the same size. */
1145 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1147 if (VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
))
1148 != (unsigned) group_size
)
1151 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1152 if (is_gimple_assign (stmt
)
1153 && (gimple_assign_rhs_code (stmt
) == REALPART_EXPR
1154 || gimple_assign_rhs_code (stmt
) == IMAGPART_EXPR
))
1158 /* Complex operands can be swapped as following:
1159 real_c = real_b + real_a;
1160 imag_c = imag_a + imag_b;
1161 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1162 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1163 chains are mixed, they match the above pattern. */
1164 if (complex_numbers
)
1166 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1168 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, stmt
)
1174 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != first
)
1176 if (complex_numbers
!= 2)
1184 other_complex_node
= VEC_index (slp_tree
,
1185 SLP_INSTANCE_LOADS (slp_instn
), k
);
1186 other_node_first
= VEC_index (gimple
,
1187 SLP_TREE_SCALAR_STMTS (other_complex_node
), 0);
1189 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1190 != other_node_first
)
1198 /* We checked that this case ok, so there is no need to proceed with
1199 permutation tests. */
1200 if (complex_numbers
== 2)
1202 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (slp_instn
));
1203 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1207 node
= SLP_INSTANCE_TREE (slp_instn
);
1208 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1209 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1210 instance, not all the loads belong to the same node or interleaving
1211 group. Hence, we need to divide them into groups according to
1213 number_of_groups
= VEC_length (int, load_permutation
) / group_size
;
1215 /* Reduction (there are no data-refs in the root).
1216 In reduction chain the order of the loads is important. */
1217 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1218 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1220 int first_group_load_index
;
1222 /* Compare all the permutation sequences to the first one. */
1223 for (i
= 1; i
< number_of_groups
; i
++)
1226 for (j
= i
* group_size
; j
< i
* group_size
+ group_size
; j
++)
1228 next
= VEC_index (int, load_permutation
, j
);
1229 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1231 if (next
!= first_group_load_index
)
1233 bad_permutation
= true;
1240 if (bad_permutation
)
1244 if (!bad_permutation
)
1246 /* Check that the loads in the first sequence are different and there
1247 are no gaps between them. */
1248 load_index
= sbitmap_alloc (group_size
);
1249 sbitmap_zero (load_index
);
1250 for (k
= 0; k
< group_size
; k
++)
1252 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1253 if (TEST_BIT (load_index
, first_group_load_index
))
1255 bad_permutation
= true;
1259 SET_BIT (load_index
, first_group_load_index
);
1262 if (!bad_permutation
)
1263 for (k
= 0; k
< group_size
; k
++)
1264 if (!TEST_BIT (load_index
, k
))
1266 bad_permutation
= true;
1270 sbitmap_free (load_index
);
1273 if (!bad_permutation
)
1275 /* This permutation is valid for reduction. Since the order of the
1276 statements in the nodes is not important unless they are memory
1277 accesses, we can rearrange the statements in all the nodes
1278 according to the order of the loads. */
1279 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1281 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1286 /* In basic block vectorization we allow any subchain of an interleaving
1288 FORNOW: not supported in loop SLP because of realignment compications. */
1289 bb_vinfo
= STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
));
1290 bad_permutation
= false;
1291 /* Check that for every node in the instance teh loads form a subchain. */
1294 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1298 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1301 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (load
));
1303 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load
)))
1305 bad_permutation
= true;
1309 if (j
!= 0 && next_load
!= load
)
1311 bad_permutation
= true;
1315 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1318 if (bad_permutation
)
1322 /* Check that the alignment of the first load in every subchain, i.e.,
1323 the first statement in every load node, is supported. */
1324 if (!bad_permutation
)
1326 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1328 first_load
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1330 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1332 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1333 if (vect_supportable_dr_alignment (dr
, false)
1334 == dr_unaligned_unsupported
)
1336 if (vect_print_dump_info (REPORT_SLP
))
1338 fprintf (vect_dump
, "unsupported unaligned load ");
1339 print_gimple_stmt (vect_dump
, first_load
, 0,
1342 bad_permutation
= true;
1348 if (!bad_permutation
)
1350 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1356 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1357 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1358 well (unless it's reduction). */
1359 if (VEC_length (int, load_permutation
)
1360 != (unsigned int) (group_size
* group_size
))
1364 load_index
= sbitmap_alloc (group_size
);
1365 sbitmap_zero (load_index
);
1366 for (j
= 0; j
< group_size
; j
++)
1368 for (i
= j
* group_size
, k
= 0;
1369 VEC_iterate (int, load_permutation
, i
, next
) && k
< group_size
;
1372 if (i
!= j
* group_size
&& next
!= prev
)
1381 if (TEST_BIT (load_index
, prev
))
1387 SET_BIT (load_index
, prev
);
1390 for (j
= 0; j
< group_size
; j
++)
1391 if (!TEST_BIT (load_index
, j
))
1394 sbitmap_free (load_index
);
1396 if (supported
&& i
== group_size
* group_size
1397 && vect_supported_slp_permutation_p (slp_instn
))
1404 /* Find the first load in the loop that belongs to INSTANCE.
1405 When loads are in several SLP nodes, there can be a case in which the first
1406 load does not appear in the first SLP node to be transformed, causing
1407 incorrect order of statements. Since we generate all the loads together,
1408 they must be inserted before the first load of the SLP instance and not
1409 before the first load of the first node of the instance. */
1412 vect_find_first_load_in_slp_instance (slp_instance instance
)
1416 gimple first_load
= NULL
, load
;
1418 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1419 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1420 first_load
= get_earlier_stmt (load
, first_load
);
1426 /* Find the last store in SLP INSTANCE. */
1429 vect_find_last_store_in_slp_instance (slp_instance instance
)
1433 gimple last_store
= NULL
, store
;
1435 node
= SLP_INSTANCE_TREE (instance
);
1437 VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, store
);
1439 last_store
= get_later_stmt (store
, last_store
);
1445 /* Analyze an SLP instance starting from a group of strided stores. Call
1446 vect_build_slp_tree to build a tree of packed stmts if possible.
1447 Return FALSE if it's impossible to SLP any stmt in the loop. */
1450 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1453 slp_instance new_instance
;
1455 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1456 unsigned int unrolling_factor
= 1, nunits
;
1457 tree vectype
, scalar_type
= NULL_TREE
;
1459 unsigned int vectorization_factor
= 0;
1460 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
, i
;
1461 unsigned int max_nunits
= 0;
1462 VEC (int, heap
) *load_permutation
;
1463 VEC (slp_tree
, heap
) *loads
;
1464 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1465 bool loads_permuted
= false;
1466 VEC (gimple
, heap
) *scalar_stmts
;
1468 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1472 scalar_type
= TREE_TYPE (DR_REF (dr
));
1473 vectype
= get_vectype_for_scalar_type (scalar_type
);
1477 gcc_assert (loop_vinfo
);
1478 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1481 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1485 gcc_assert (loop_vinfo
);
1486 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1487 group_size
= VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
1492 if (vect_print_dump_info (REPORT_SLP
))
1494 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
1495 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
1501 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1503 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1505 vectorization_factor
= nunits
;
1507 /* Calculate the unrolling factor. */
1508 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1509 if (unrolling_factor
!= 1 && !loop_vinfo
)
1511 if (vect_print_dump_info (REPORT_SLP
))
1512 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1518 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1519 scalar_stmts
= VEC_alloc (gimple
, heap
, group_size
);
1521 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1523 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1526 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1527 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1528 VEC_safe_push (gimple
, heap
, scalar_stmts
,
1529 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1531 VEC_safe_push (gimple
, heap
, scalar_stmts
, next
);
1532 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1537 /* Collect reduction statements. */
1538 VEC (gimple
, heap
) *reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1539 for (i
= 0; VEC_iterate (gimple
, reductions
, i
, next
); i
++)
1540 VEC_safe_push (gimple
, heap
, scalar_stmts
, next
);
1543 node
= vect_create_new_slp_node (scalar_stmts
);
1545 /* Calculate the number of vector stmts to create based on the unrolling
1546 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1547 GROUP_SIZE / NUNITS otherwise. */
1548 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
1550 load_permutation
= VEC_alloc (int, heap
, group_size
* group_size
);
1551 loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1553 /* Build the tree for the SLP instance. */
1554 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1555 &inside_cost
, &outside_cost
, ncopies_for_cost
,
1556 &max_nunits
, &load_permutation
, &loads
,
1557 vectorization_factor
, &loads_permuted
))
1559 /* Calculate the unrolling factor based on the smallest type. */
1560 if (max_nunits
> nunits
)
1561 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1564 if (unrolling_factor
!= 1 && !loop_vinfo
)
1566 if (vect_print_dump_info (REPORT_SLP
))
1567 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1572 /* Create a new SLP instance. */
1573 new_instance
= XNEW (struct _slp_instance
);
1574 SLP_INSTANCE_TREE (new_instance
) = node
;
1575 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1576 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1577 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
1578 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
1579 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1580 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1581 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
) = load_permutation
;
1585 if (!vect_supported_load_permutation_p (new_instance
, group_size
,
1588 if (vect_print_dump_info (REPORT_SLP
))
1590 fprintf (vect_dump
, "Build SLP failed: unsupported load "
1592 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1595 vect_free_slp_instance (new_instance
);
1599 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1600 = vect_find_first_load_in_slp_instance (new_instance
);
1603 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (new_instance
));
1606 VEC_safe_push (slp_instance
, heap
,
1607 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
1610 VEC_safe_push (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
),
1613 if (vect_print_dump_info (REPORT_SLP
))
1614 vect_print_slp_tree (node
);
1619 /* Failed to SLP. */
1620 /* Free the allocated memory. */
1621 vect_free_slp_tree (node
);
1622 VEC_free (int, heap
, load_permutation
);
1623 VEC_free (slp_tree
, heap
, loads
);
1629 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1630 trees of packed scalar stmts if SLP is possible. */
1633 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1636 VEC (gimple
, heap
) *strided_stores
, *reductions
= NULL
, *reduc_chains
= NULL
;
1637 gimple first_element
;
1640 if (vect_print_dump_info (REPORT_SLP
))
1641 fprintf (vect_dump
, "=== vect_analyze_slp ===");
1645 strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
1646 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1647 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1650 strided_stores
= BB_VINFO_STRIDED_STORES (bb_vinfo
);
1652 /* Find SLP sequences starting from groups of strided stores. */
1653 FOR_EACH_VEC_ELT (gimple
, strided_stores
, i
, first_element
)
1654 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1657 if (bb_vinfo
&& !ok
)
1659 if (vect_print_dump_info (REPORT_SLP
))
1660 fprintf (vect_dump
, "Failed to SLP the basic block.");
1666 && VEC_length (gimple
, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
)) > 0)
1668 /* Find SLP sequences starting from reduction chains. */
1669 FOR_EACH_VEC_ELT (gimple
, reduc_chains
, i
, first_element
)
1670 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1675 /* Don't try to vectorize SLP reductions if reduction chain was
1680 /* Find SLP sequences starting from groups of reductions. */
1681 if (loop_vinfo
&& VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
)) > 1
1682 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
,
1683 VEC_index (gimple
, reductions
, 0)))
1690 /* For each possible SLP instance decide whether to SLP it and calculate overall
1691 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1692 least one instance. */
1695 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1697 unsigned int i
, unrolling_factor
= 1;
1698 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1699 slp_instance instance
;
1700 int decided_to_slp
= 0;
1702 if (vect_print_dump_info (REPORT_SLP
))
1703 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
1705 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1707 /* FORNOW: SLP if you can. */
1708 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1709 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1711 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1712 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1713 loop-based vectorization. Such stmts will be marked as HYBRID. */
1714 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1718 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1720 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
1721 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
1722 decided_to_slp
, unrolling_factor
);
1724 return (decided_to_slp
> 0);
1728 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1729 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1732 vect_detect_hybrid_slp_stmts (slp_tree node
)
1735 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (node
);
1736 gimple stmt
= VEC_index (gimple
, stmts
, 0);
1737 imm_use_iterator imm_iter
;
1739 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1741 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1742 struct loop
*loop
= NULL
;
1743 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1744 basic_block bb
= NULL
;
1750 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1752 bb
= BB_VINFO_BB (bb_vinfo
);
1754 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1755 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1756 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1757 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1758 if (gimple_bb (use_stmt
)
1759 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1760 || bb
== gimple_bb (use_stmt
))
1761 && (stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1762 && !STMT_SLP_TYPE (stmt_vinfo
)
1763 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1764 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1765 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1766 && STMT_VINFO_DEF_TYPE (stmt_vinfo
)
1767 == vect_reduction_def
))
1768 vect_mark_slp_stmts (node
, hybrid
, i
);
1770 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1771 vect_detect_hybrid_slp_stmts ((slp_tree
) child
);
1775 /* Find stmts that must be both vectorized and SLPed. */
1778 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1781 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1782 slp_instance instance
;
1784 if (vect_print_dump_info (REPORT_SLP
))
1785 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
1787 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1788 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1792 /* Create and initialize a new bb_vec_info struct for BB, as well as
1793 stmt_vec_info structs for all the stmts in it. */
1796 new_bb_vec_info (basic_block bb
)
1798 bb_vec_info res
= NULL
;
1799 gimple_stmt_iterator gsi
;
1801 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1802 BB_VINFO_BB (res
) = bb
;
1804 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1806 gimple stmt
= gsi_stmt (gsi
);
1807 gimple_set_uid (stmt
, 0);
1808 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1811 BB_VINFO_STRIDED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
1812 BB_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 2);
1819 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1820 stmts in the basic block. */
1823 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1826 gimple_stmt_iterator si
;
1831 bb
= BB_VINFO_BB (bb_vinfo
);
1833 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1835 gimple stmt
= gsi_stmt (si
);
1836 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1839 /* Free stmt_vec_info. */
1840 free_stmt_vec_info (stmt
);
1843 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo
));
1844 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
1845 VEC_free (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
));
1846 VEC_free (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
));
1852 /* Analyze statements contained in SLP tree node after recursively analyzing
1853 the subtree. Return TRUE if the operations are supported. */
1856 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1866 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1867 if (!vect_slp_analyze_node_operations (bb_vinfo
, (slp_tree
) child
))
1870 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1872 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1873 gcc_assert (stmt_info
);
1874 gcc_assert (PURE_SLP_STMT (stmt_info
));
1876 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1884 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1885 operations are supported. */
1888 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1890 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1891 slp_instance instance
;
1894 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); )
1896 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1897 SLP_INSTANCE_TREE (instance
)))
1899 vect_free_slp_instance (instance
);
1900 VEC_ordered_remove (slp_instance
, slp_instances
, i
);
1906 if (!VEC_length (slp_instance
, slp_instances
))
1912 /* Check if vectorization of the basic block is profitable. */
1915 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
1917 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1918 slp_instance instance
;
1920 unsigned int vec_outside_cost
= 0, vec_inside_cost
= 0, scalar_cost
= 0;
1921 unsigned int stmt_cost
;
1923 gimple_stmt_iterator si
;
1924 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1925 stmt_vec_info stmt_info
= NULL
;
1926 tree dummy_type
= NULL
;
1929 /* Calculate vector costs. */
1930 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1932 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
1933 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
1936 /* Calculate scalar cost. */
1937 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1939 stmt
= gsi_stmt (si
);
1940 stmt_info
= vinfo_for_stmt (stmt
);
1942 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
)
1943 || !PURE_SLP_STMT (stmt_info
))
1946 if (STMT_VINFO_DATA_REF (stmt_info
))
1948 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1949 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1950 (scalar_load
, dummy_type
, dummy
);
1952 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1953 (scalar_store
, dummy_type
, dummy
);
1956 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1957 (scalar_stmt
, dummy_type
, dummy
);
1959 scalar_cost
+= stmt_cost
;
1962 if (vect_print_dump_info (REPORT_COST
))
1964 fprintf (vect_dump
, "Cost model analysis: \n");
1965 fprintf (vect_dump
, " Vector inside of basic block cost: %d\n",
1967 fprintf (vect_dump
, " Vector outside of basic block cost: %d\n",
1969 fprintf (vect_dump
, " Scalar cost of basic block: %d", scalar_cost
);
1972 /* Vectorization is profitable if its cost is less than the cost of scalar
1974 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
1980 /* Check if the basic block can be vectorized. */
1983 vect_slp_analyze_bb_1 (basic_block bb
)
1985 bb_vec_info bb_vinfo
;
1986 VEC (ddr_p
, heap
) *ddrs
;
1987 VEC (slp_instance
, heap
) *slp_instances
;
1988 slp_instance instance
;
1991 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1993 bb_vinfo
= new_bb_vec_info (bb
);
1997 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
1999 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2000 fprintf (vect_dump
, "not vectorized: unhandled data-ref in basic "
2003 destroy_bb_vec_info (bb_vinfo
);
2007 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
2008 if (!VEC_length (ddr_p
, ddrs
))
2010 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2011 fprintf (vect_dump
, "not vectorized: not enough data-refs in basic "
2014 destroy_bb_vec_info (bb_vinfo
);
2018 if (!vect_analyze_data_ref_dependences (NULL
, bb_vinfo
, &max_vf
)
2021 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2022 fprintf (vect_dump
, "not vectorized: unhandled data dependence "
2023 "in basic block.\n");
2025 destroy_bb_vec_info (bb_vinfo
);
2029 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
2031 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2032 fprintf (vect_dump
, "not vectorized: bad data alignment in basic "
2035 destroy_bb_vec_info (bb_vinfo
);
2039 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
2041 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2042 fprintf (vect_dump
, "not vectorized: unhandled data access in basic "
2045 destroy_bb_vec_info (bb_vinfo
);
2049 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
2051 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2052 fprintf (vect_dump
, "not vectorized: unsupported alignment in basic "
2055 destroy_bb_vec_info (bb_vinfo
);
2059 /* Check the SLP opportunities in the basic block, analyze and build SLP
2061 if (!vect_analyze_slp (NULL
, bb_vinfo
))
2063 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2064 fprintf (vect_dump
, "not vectorized: failed to find SLP opportunities "
2065 "in basic block.\n");
2067 destroy_bb_vec_info (bb_vinfo
);
2071 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2073 /* Mark all the statements that we want to vectorize as pure SLP and
2075 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2077 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2078 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2081 if (!vect_slp_analyze_operations (bb_vinfo
))
2083 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2084 fprintf (vect_dump
, "not vectorized: bad operation in basic block.\n");
2086 destroy_bb_vec_info (bb_vinfo
);
2090 /* Cost model: check if the vectorization is worthwhile. */
2091 if (flag_vect_cost_model
2092 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2094 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2095 fprintf (vect_dump
, "not vectorized: vectorization is not "
2098 destroy_bb_vec_info (bb_vinfo
);
2102 if (vect_print_dump_info (REPORT_DETAILS
))
2103 fprintf (vect_dump
, "Basic block will be vectorized using SLP\n");
2110 vect_slp_analyze_bb (basic_block bb
)
2112 bb_vec_info bb_vinfo
;
2114 gimple_stmt_iterator gsi
;
2115 unsigned int vector_sizes
;
2117 if (vect_print_dump_info (REPORT_DETAILS
))
2118 fprintf (vect_dump
, "===vect_slp_analyze_bb===\n");
2120 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2122 gimple stmt
= gsi_stmt (gsi
);
2123 if (!is_gimple_debug (stmt
)
2124 && !gimple_nop_p (stmt
)
2125 && gimple_code (stmt
) != GIMPLE_LABEL
)
2129 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2131 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2132 fprintf (vect_dump
, "not vectorized: too many instructions in basic "
2138 /* Autodetect first vector size we try. */
2139 current_vector_size
= 0;
2140 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2144 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
2148 destroy_bb_vec_info (bb_vinfo
);
2150 vector_sizes
&= ~current_vector_size
;
2151 if (vector_sizes
== 0
2152 || current_vector_size
== 0)
2155 /* Try the next biggest vector size. */
2156 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2157 if (vect_print_dump_info (REPORT_DETAILS
))
2158 fprintf (vect_dump
, "***** Re-trying analysis with "
2159 "vector size %d\n", current_vector_size
);
2164 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2165 the number of created vector stmts depends on the unrolling factor).
2166 However, the actual number of vector stmts for every SLP node depends on
2167 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2168 should be updated. In this function we assume that the inside costs
2169 calculated in vect_model_xxx_cost are linear in ncopies. */
2172 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
2174 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2175 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2176 slp_instance instance
;
2178 if (vect_print_dump_info (REPORT_SLP
))
2179 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
2181 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2182 /* We assume that costs are linear in ncopies. */
2183 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
2184 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2188 /* For constant and loop invariant defs of SLP_NODE this function returns
2189 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2190 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2191 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2192 REDUC_INDEX is the index of the reduction operand in the statements, unless
2196 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2197 VEC (tree
, heap
) **vec_oprnds
,
2198 unsigned int op_num
, unsigned int number_of_vectors
,
2201 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2202 gimple stmt
= VEC_index (gimple
, stmts
, 0);
2203 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2207 int j
, number_of_places_left_in_vector
;
2210 int group_size
= VEC_length (gimple
, stmts
);
2211 unsigned int vec_num
, i
;
2212 int number_of_copies
= 1;
2213 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
2214 bool constant_p
, is_store
;
2215 tree neutral_op
= NULL
;
2216 enum tree_code code
= gimple_expr_code (stmt
);
2220 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2221 && reduc_index
!= -1)
2223 op_num
= reduc_index
- 1;
2224 op
= gimple_op (stmt
, reduc_index
);
2225 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2226 we need either neutral operands or the original operands. See
2227 get_initial_def_for_reduction() for details. */
2230 case WIDEN_SUM_EXPR
:
2236 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2237 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2239 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2244 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2245 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2247 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2252 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2257 def_stmt
= SSA_NAME_DEF_STMT (op
);
2258 loop
= (gimple_bb (stmt
))->loop_father
;
2259 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2260 loop_preheader_edge (loop
));
2268 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2271 op
= gimple_assign_rhs1 (stmt
);
2278 if (CONSTANT_CLASS_P (op
))
2283 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2284 gcc_assert (vector_type
);
2285 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2287 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2288 created vectors. It is greater than 1 if unrolling is performed.
2290 For example, we have two scalar operands, s1 and s2 (e.g., group of
2291 strided accesses of size two), while NUNITS is four (i.e., four scalars
2292 of this type can be packed in a vector). The output vector will contain
2293 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2296 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2297 containing the operands.
2299 For example, NUNITS is four as before, and the group size is 8
2300 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2301 {s5, s6, s7, s8}. */
2303 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
2305 number_of_places_left_in_vector
= nunits
;
2306 for (j
= 0; j
< number_of_copies
; j
++)
2308 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
2311 op
= gimple_assign_rhs1 (stmt
);
2317 if (op_num
== 0 || op_num
== 1)
2319 tree cond
= gimple_assign_rhs1 (stmt
);
2320 op
= TREE_OPERAND (cond
, op_num
);
2325 op
= gimple_assign_rhs2 (stmt
);
2327 op
= gimple_assign_rhs3 (stmt
);
2332 op
= gimple_call_arg (stmt
, op_num
);
2336 op
= gimple_op (stmt
, op_num
+ 1);
2340 if (reduc_index
!= -1)
2342 loop
= (gimple_bb (stmt
))->loop_father
;
2343 def_stmt
= SSA_NAME_DEF_STMT (op
);
2347 /* Get the def before the loop. In reduction chain we have only
2348 one initial value. */
2349 if ((j
!= (number_of_copies
- 1)
2350 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2355 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2356 loop_preheader_edge (loop
));
2359 /* Create 'vect_ = {op0,op1,...,opn}'. */
2360 t
= tree_cons (NULL_TREE
, op
, t
);
2362 number_of_places_left_in_vector
--;
2364 if (number_of_places_left_in_vector
== 0)
2366 number_of_places_left_in_vector
= nunits
;
2369 vec_cst
= build_vector (vector_type
, t
);
2371 vec_cst
= build_constructor_from_list (vector_type
, t
);
2372 VEC_quick_push (tree
, voprnds
,
2373 vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
));
2379 /* Since the vectors are created in the reverse order, we should invert
2381 vec_num
= VEC_length (tree
, voprnds
);
2382 for (j
= vec_num
- 1; j
>= 0; j
--)
2384 vop
= VEC_index (tree
, voprnds
, j
);
2385 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2388 VEC_free (tree
, heap
, voprnds
);
2390 /* In case that VF is greater than the unrolling factor needed for the SLP
2391 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2392 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2393 to replicate the vectors. */
2394 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
2396 tree neutral_vec
= NULL
;
2401 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2403 VEC_quick_push (tree
, *vec_oprnds
, neutral_vec
);
2407 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
2408 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2414 /* Get vectorized definitions from SLP_NODE that contains corresponding
2415 vectorized def-stmts. */
2418 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
2421 gimple vec_def_stmt
;
2424 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
2426 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2428 gcc_assert (vec_def_stmt
);
2429 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2430 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2435 /* Get vectorized definitions for SLP_NODE.
2436 If the scalar definitions are loop invariants or constants, collect them and
2437 call vect_get_constant_vectors() to create vector stmts.
2438 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2439 must be stored in the corresponding child of SLP_NODE, and we call
2440 vect_get_slp_vect_defs () to retrieve them. */
2443 vect_get_slp_defs (VEC (tree
, heap
) *ops
, slp_tree slp_node
,
2444 VEC (slp_void_p
, heap
) **vec_oprnds
, int reduc_index
)
2446 gimple first_stmt
, first_def
;
2447 int number_of_vects
= 0, i
;
2448 unsigned int child_index
= 0;
2449 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2450 slp_tree child
= NULL
;
2451 VEC (tree
, heap
) *vec_defs
;
2452 tree oprnd
, def_lhs
;
2453 bool vectorized_defs
;
2455 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
2456 FOR_EACH_VEC_ELT (tree
, ops
, i
, oprnd
)
2458 /* For each operand we check if it has vectorized definitions in a child
2459 node or we need to create them (for invariants and constants). We
2460 check if the LHS of the first stmt of the next child matches OPRND.
2461 If it does, we found the correct child. Otherwise, we call
2462 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2463 to check this child node for the next operand. */
2464 vectorized_defs
= false;
2465 if (VEC_length (slp_void_p
, SLP_TREE_CHILDREN (slp_node
)) > child_index
)
2467 child
= (slp_tree
) VEC_index (slp_void_p
,
2468 SLP_TREE_CHILDREN (slp_node
),
2470 first_def
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (child
), 0);
2472 /* In the end of a pattern sequence we have a use of the original stmt,
2473 so we need to compare OPRND with the original def. */
2474 if (is_pattern_stmt_p (vinfo_for_stmt (first_def
))
2475 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt
))
2476 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt
)))
2477 first_def
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
2479 if (is_gimple_call (first_def
))
2480 def_lhs
= gimple_call_lhs (first_def
);
2482 def_lhs
= gimple_assign_lhs (first_def
);
2484 if (operand_equal_p (oprnd
, def_lhs
, 0))
2486 /* The number of vector defs is determined by the number of
2487 vector statements in the node from which we get those
2489 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
2490 vectorized_defs
= true;
2495 if (!vectorized_defs
)
2499 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2500 /* Number of vector stmts was calculated according to LHS in
2501 vect_schedule_slp_instance (), fix it by replacing LHS with
2502 RHS, if necessary. See vect_get_smallest_scalar_type () for
2504 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2506 if (rhs_size_unit
!= lhs_size_unit
)
2508 number_of_vects
*= rhs_size_unit
;
2509 number_of_vects
/= lhs_size_unit
;
2514 /* Allocate memory for vectorized defs. */
2515 vec_defs
= VEC_alloc (tree
, heap
, number_of_vects
);
2517 /* For reduction defs we call vect_get_constant_vectors (), since we are
2518 looking for initial loop invariant values. */
2519 if (vectorized_defs
&& reduc_index
== -1)
2520 /* The defs are already vectorized. */
2521 vect_get_slp_vect_defs (child
, &vec_defs
);
2523 /* Build vectors from scalar defs. */
2524 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
2525 number_of_vects
, reduc_index
);
2527 VEC_quick_push (slp_void_p
, *vec_oprnds
, (slp_void_p
) vec_defs
);
2529 /* For reductions, we only need initial values. */
2530 if (reduc_index
!= -1)
2536 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2537 building a vector of type MASK_TYPE from it) and two input vectors placed in
2538 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2539 shifting by STRIDE elements of DR_CHAIN for every copy.
2540 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2542 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2543 the created stmts must be inserted. */
2546 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2547 tree mask
, int first_vec_indx
, int second_vec_indx
,
2548 gimple_stmt_iterator
*gsi
, slp_tree node
,
2549 tree vectype
, VEC(tree
,heap
) *dr_chain
,
2550 int ncopies
, int vect_stmts_counter
)
2553 gimple perm_stmt
= NULL
;
2554 stmt_vec_info next_stmt_info
;
2556 tree first_vec
, second_vec
, data_ref
;
2558 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2560 /* Initialize the vect stmts of NODE to properly insert the generated
2562 for (i
= VEC_length (gimple
, SLP_TREE_VEC_STMTS (node
));
2563 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2564 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (node
), NULL
);
2566 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2567 for (i
= 0; i
< ncopies
; i
++)
2569 first_vec
= VEC_index (tree
, dr_chain
, first_vec_indx
);
2570 second_vec
= VEC_index (tree
, dr_chain
, second_vec_indx
);
2572 /* Generate the permute statement. */
2573 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, perm_dest
,
2574 first_vec
, second_vec
, mask
);
2575 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2576 gimple_set_lhs (perm_stmt
, data_ref
);
2577 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2579 /* Store the vector statement in NODE. */
2580 VEC_replace (gimple
, SLP_TREE_VEC_STMTS (node
),
2581 stride
* i
+ vect_stmts_counter
, perm_stmt
);
2583 first_vec_indx
+= stride
;
2584 second_vec_indx
+= stride
;
2587 /* Mark the scalar stmt as vectorized. */
2588 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2589 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2593 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2594 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2595 representation. Check that the mask is valid and return FALSE if not.
2596 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2597 the next vector, i.e., the current first vector is not needed. */
2600 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2601 int mask_nunits
, bool only_one_vec
, int index
,
2602 unsigned char *mask
, int *current_mask_element
,
2603 bool *need_next_vector
, int *number_of_mask_fixes
,
2604 bool *mask_fixed
, bool *needs_first_vector
)
2608 /* Convert to target specific representation. */
2609 *current_mask_element
= first_mask_element
+ m
;
2610 /* Adjust the value in case it's a mask for second and third vectors. */
2611 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2613 if (*current_mask_element
< mask_nunits
)
2614 *needs_first_vector
= true;
2616 /* We have only one input vector to permute but the mask accesses values in
2617 the next vector as well. */
2618 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2620 if (vect_print_dump_info (REPORT_DETAILS
))
2622 fprintf (vect_dump
, "permutation requires at least two vectors ");
2623 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2629 /* The mask requires the next vector. */
2630 if (*current_mask_element
>= mask_nunits
* 2)
2632 if (*needs_first_vector
|| *mask_fixed
)
2634 /* We either need the first vector too or have already moved to the
2635 next vector. In both cases, this permutation needs three
2637 if (vect_print_dump_info (REPORT_DETAILS
))
2639 fprintf (vect_dump
, "permutation requires at "
2640 "least three vectors ");
2641 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2647 /* We move to the next vector, dropping the first one and working with
2648 the second and the third - we need to adjust the values of the mask
2650 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2652 for (i
= 0; i
< index
; i
++)
2653 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2655 (*number_of_mask_fixes
)++;
2659 *need_next_vector
= *mask_fixed
;
2661 /* This was the last element of this mask. Start a new one. */
2662 if (index
== mask_nunits
- 1)
2664 *number_of_mask_fixes
= 1;
2665 *mask_fixed
= false;
2666 *needs_first_vector
= false;
2673 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2674 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2675 permute statements for SLP_NODE_INSTANCE. */
2677 vect_transform_slp_perm_load (gimple stmt
, VEC (tree
, heap
) *dr_chain
,
2678 gimple_stmt_iterator
*gsi
, int vf
,
2679 slp_instance slp_node_instance
, bool analyze_only
)
2681 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2682 tree mask_element_type
= NULL_TREE
, mask_type
;
2683 int i
, j
, k
, nunits
, vec_index
= 0, scalar_index
;
2685 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2686 gimple next_scalar_stmt
;
2687 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2688 int first_mask_element
;
2689 int index
, unroll_factor
, current_mask_element
, ncopies
;
2690 unsigned char *mask
;
2691 bool only_one_vec
= false, need_next_vector
= false;
2692 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2693 int number_of_mask_fixes
= 1;
2694 bool mask_fixed
= false;
2695 bool needs_first_vector
= false;
2696 enum machine_mode mode
;
2698 mode
= TYPE_MODE (vectype
);
2700 if (!can_vec_perm_p (mode
, false, NULL
))
2702 if (vect_print_dump_info (REPORT_DETAILS
))
2704 fprintf (vect_dump
, "no vect permute for ");
2705 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2710 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2711 same size as the vector element being permuted. */
2713 = lang_hooks
.types
.type_for_size
2714 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype
))), 1);
2715 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2716 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2717 mask
= XALLOCAVEC (unsigned char, nunits
);
2718 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2720 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2721 unrolling factor. */
2722 orig_vec_stmts_num
= group_size
*
2723 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2724 if (orig_vec_stmts_num
== 1)
2725 only_one_vec
= true;
2727 /* Number of copies is determined by the final vectorization factor
2728 relatively to SLP_NODE_INSTANCE unrolling factor. */
2729 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2731 /* Generate permutation masks for every NODE. Number of masks for each NODE
2732 is equal to GROUP_SIZE.
2733 E.g., we have a group of three nodes with three loads from the same
2734 location in each node, and the vector size is 4. I.e., we have a
2735 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2736 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2737 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2740 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2741 The last mask is illegal since we assume two operands for permute
2742 operation, and the mask element values can't be outside that range.
2743 Hence, the last mask must be converted into {2,5,5,5}.
2744 For the first two permutations we need the first and the second input
2745 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2746 we need the second and the third vectors: {b1,c1,a2,b2} and
2749 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_node_instance
), i
, node
)
2753 vect_stmts_counter
= 0;
2755 first_vec_index
= vec_index
++;
2757 second_vec_index
= first_vec_index
;
2759 second_vec_index
= vec_index
++;
2761 for (j
= 0; j
< unroll_factor
; j
++)
2763 for (k
= 0; k
< group_size
; k
++)
2765 first_mask_element
= i
+ j
* group_size
;
2766 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
2767 nunits
, only_one_vec
, index
,
2768 mask
, ¤t_mask_element
,
2770 &number_of_mask_fixes
, &mask_fixed
,
2771 &needs_first_vector
))
2773 mask
[index
++] = current_mask_element
;
2775 if (index
== nunits
)
2777 tree mask_vec
= NULL
;
2779 if (!can_vec_perm_p (mode
, false, mask
))
2781 if (vect_print_dump_info (REPORT_DETAILS
))
2783 fprintf (vect_dump
, "unsupported vect permute { ");
2784 for (i
= 0; i
< nunits
; ++i
)
2785 fprintf (vect_dump
, "%d ", mask
[i
]);
2786 fprintf (vect_dump
, "}\n");
2791 while (--index
>= 0)
2793 tree t
= build_int_cst (mask_element_type
, mask
[index
]);
2794 mask_vec
= tree_cons (NULL
, t
, mask_vec
);
2796 mask_vec
= build_vector (mask_type
, mask_vec
);
2801 if (need_next_vector
)
2803 first_vec_index
= second_vec_index
;
2804 second_vec_index
= vec_index
;
2807 next_scalar_stmt
= VEC_index (gimple
,
2808 SLP_TREE_SCALAR_STMTS (node
), scalar_index
++);
2810 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2811 mask_vec
, first_vec_index
, second_vec_index
,
2812 gsi
, node
, vectype
, dr_chain
,
2813 ncopies
, vect_stmts_counter
++);
2825 /* Vectorize SLP instance tree in postorder. */
2828 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2829 unsigned int vectorization_factor
)
2832 bool strided_store
, is_store
;
2833 gimple_stmt_iterator si
;
2834 stmt_vec_info stmt_info
;
2835 unsigned int vec_stmts_size
, nunits
, group_size
;
2838 slp_tree loads_node
;
2844 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
2845 vect_schedule_slp_instance ((slp_tree
) child
, instance
,
2846 vectorization_factor
);
2848 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
2849 stmt_info
= vinfo_for_stmt (stmt
);
2851 /* VECTYPE is the type of the destination. */
2852 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2853 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
2854 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2856 /* For each SLP instance calculate number of vector stmts to be created
2857 for the scalar stmts in each node of the SLP tree. Number of vector
2858 elements in one vector iteration is the number of scalar elements in
2859 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2861 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
2863 /* In case of load permutation we have to allocate vectorized statements for
2864 all the nodes that participate in that permutation. */
2865 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2867 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, loads_node
)
2869 if (!SLP_TREE_VEC_STMTS (loads_node
))
2871 SLP_TREE_VEC_STMTS (loads_node
) = VEC_alloc (gimple
, heap
,
2873 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
2878 if (!SLP_TREE_VEC_STMTS (node
))
2880 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
2881 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
2884 if (vect_print_dump_info (REPORT_DETAILS
))
2886 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
2887 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2890 /* Loads should be inserted before the first load. */
2891 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
2892 && STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2893 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
))
2894 && SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2895 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
2896 else if (is_pattern_stmt_p (stmt_info
))
2897 si
= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2899 si
= gsi_for_stmt (stmt
);
2901 /* Stores should be inserted just before the last store. */
2902 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2903 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2905 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
2906 if (is_pattern_stmt_p (vinfo_for_stmt (last_store
)))
2907 last_store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store
));
2908 si
= gsi_for_stmt (last_store
);
2911 /* Mark the first element of the reduction chain as reduction to properly
2912 transform the node. In the analysis phase only the last element of the
2913 chain is marked as reduction. */
2914 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2915 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
2917 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
2918 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
2921 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
, instance
);
2925 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2926 For loop vectorization this is done in vectorizable_call, but for SLP
2927 it needs to be deferred until end of vect_schedule_slp, because multiple
2928 SLP instances may refer to the same scalar stmt. */
2931 vect_remove_slp_scalar_calls (slp_tree node
)
2933 gimple stmt
, new_stmt
;
2934 gimple_stmt_iterator gsi
;
2938 stmt_vec_info stmt_info
;
2943 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
2944 vect_remove_slp_scalar_calls ((slp_tree
) child
);
2946 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2948 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
2950 stmt_info
= vinfo_for_stmt (stmt
);
2951 if (stmt_info
== NULL
2952 || is_pattern_stmt_p (stmt_info
)
2953 || !PURE_SLP_STMT (stmt_info
))
2955 lhs
= gimple_call_lhs (stmt
);
2956 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2957 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2958 set_vinfo_for_stmt (stmt
, NULL
);
2959 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2960 gsi
= gsi_for_stmt (stmt
);
2961 gsi_replace (&gsi
, new_stmt
, false);
2962 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2966 /* Generate vector code for all SLP instances in the loop/basic block. */
2969 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2971 VEC (slp_instance
, heap
) *slp_instances
;
2972 slp_instance instance
;
2974 bool is_store
= false;
2978 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2979 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2983 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2987 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2989 /* Schedule the tree of INSTANCE. */
2990 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
2992 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
)
2993 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2994 fprintf (vect_dump
, "vectorizing stmts using SLP.");
2997 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2999 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3002 gimple_stmt_iterator gsi
;
3004 vect_remove_slp_scalar_calls (root
);
3006 for (j
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (root
), j
, store
)
3007 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3009 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3012 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3013 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3014 /* Free the attached stmt_vec_info and remove the stmt. */
3015 gsi
= gsi_for_stmt (store
);
3016 gsi_remove (&gsi
, true);
3017 free_stmt_vec_info (store
);
3025 /* Vectorize the basic block. */
3028 vect_slp_transform_bb (basic_block bb
)
3030 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
3031 gimple_stmt_iterator si
;
3033 gcc_assert (bb_vinfo
);
3035 if (vect_print_dump_info (REPORT_DETAILS
))
3036 fprintf (vect_dump
, "SLPing BB\n");
3038 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3040 gimple stmt
= gsi_stmt (si
);
3041 stmt_vec_info stmt_info
;
3043 if (vect_print_dump_info (REPORT_DETAILS
))
3045 fprintf (vect_dump
, "------>SLPing statement: ");
3046 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3049 stmt_info
= vinfo_for_stmt (stmt
);
3050 gcc_assert (stmt_info
);
3052 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3053 if (STMT_SLP_TYPE (stmt_info
))
3055 vect_schedule_slp (NULL
, bb_vinfo
);
3060 mark_sym_for_renaming (gimple_vop (cfun
));
3061 /* The memory tags and pointers in vectorized statements need to
3062 have their SSA forms updated. FIXME, why can't this be delayed
3063 until all the loops have been transformed? */
3064 update_ssa (TODO_update_ssa
);
3066 if (vect_print_dump_info (REPORT_DETAILS
))
3067 fprintf (vect_dump
, "BASIC BLOCK VECTORIZED\n");
3069 destroy_bb_vec_info (bb_vinfo
);