2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
5 Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "tree-pretty-print.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
46 /* Loop Vectorization Pass.
48 This pass tries to vectorize loops.
50 For example, the vectorizer transforms the following simple loop:
52 short a[N]; short b[N]; short c[N]; int i;
58 as if it was manually vectorized by rewriting the source code into:
60 typedef int __attribute__((mode(V8HI))) v8hi;
61 short a[N]; short b[N]; short c[N]; int i;
62 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
65 for (i=0; i<N/8; i++){
72 The main entry to this pass is vectorize_loops(), in which
73 the vectorizer applies a set of analyses on a given set of loops,
74 followed by the actual vectorization transformation for the loops that
75 had successfully passed the analysis phase.
76 Throughout this pass we make a distinction between two types of
77 data: scalars (which are represented by SSA_NAMES), and memory references
78 ("data-refs"). These two types of data require different handling both
79 during analysis and transformation. The types of data-refs that the
80 vectorizer currently supports are ARRAY_REFS which base is an array DECL
81 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
82 accesses are required to have a simple (consecutive) access pattern.
86 The driver for the analysis phase is vect_analyze_loop().
87 It applies a set of analyses, some of which rely on the scalar evolution
88 analyzer (scev) developed by Sebastian Pop.
90 During the analysis phase the vectorizer records some information
91 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
92 loop, as well as general information about the loop as a whole, which is
93 recorded in a "loop_vec_info" struct attached to each loop.
97 The loop transformation phase scans all the stmts in the loop, and
98 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
99 the loop that needs to be vectorized. It inserts the vector code sequence
100 just before the scalar stmt S, and records a pointer to the vector code
101 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
102 attached to S). This pointer will be used for the vectorization of following
103 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
104 otherwise, we rely on dead code elimination for removing it.
106 For example, say stmt S1 was vectorized into stmt VS1:
109 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
112 To vectorize stmt S2, the vectorizer first finds the stmt that defines
113 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
114 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
115 resulting sequence would be:
118 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
120 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
122 Operands that are not SSA_NAMEs, are data-refs that appear in
123 load/store operations (like 'x[i]' in S1), and are handled differently.
127 Currently the only target specific information that is used is the
128 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
129 Targets that can support different sizes of vectors, for now will need
130 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
131 flexibility will be added in the future.
133 Since we only vectorize operations which vector form can be
134 expressed using existing tree codes, to verify that an operation is
135 supported, the vectorizer checks the relevant optab at the relevant
136 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
137 the value found is CODE_FOR_nothing, then there's no target support, and
138 we can't vectorize the stmt.
140 For additional information on this project see:
141 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
144 /* Function vect_determine_vectorization_factor
146 Determine the vectorization factor (VF). VF is the number of data elements
147 that are operated upon in parallel in a single iteration of the vectorized
148 loop. For example, when vectorizing a loop that operates on 4byte elements,
149 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
150 elements can fit in a single vector register.
152 We currently support vectorization of loops in which all types operated upon
153 are of the same size. Therefore this function currently sets VF according to
154 the size of the types operated upon, and fails if there are multiple sizes
157 VF is also the factor by which the loop iterations are strip-mined, e.g.:
164 for (i=0; i<N; i+=VF){
165 a[i:VF] = b[i:VF] + c[i:VF];
170 vect_determine_vectorization_factor (loop_vec_info loop_vinfo
)
172 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
173 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
174 int nbbs
= loop
->num_nodes
;
175 gimple_stmt_iterator si
;
176 unsigned int vectorization_factor
= 0;
181 stmt_vec_info stmt_info
;
184 gimple stmt
, pattern_stmt
= NULL
;
185 gimple_seq pattern_def_seq
= NULL
;
186 gimple_stmt_iterator pattern_def_si
= gsi_none ();
187 bool analyze_pattern_stmt
= false;
189 if (vect_print_dump_info (REPORT_DETAILS
))
190 fprintf (vect_dump
, "=== vect_determine_vectorization_factor ===");
192 for (i
= 0; i
< nbbs
; i
++)
194 basic_block bb
= bbs
[i
];
196 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
199 stmt_info
= vinfo_for_stmt (phi
);
200 if (vect_print_dump_info (REPORT_DETAILS
))
202 fprintf (vect_dump
, "==> examining phi: ");
203 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
206 gcc_assert (stmt_info
);
208 if (STMT_VINFO_RELEVANT_P (stmt_info
))
210 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info
));
211 scalar_type
= TREE_TYPE (PHI_RESULT (phi
));
213 if (vect_print_dump_info (REPORT_DETAILS
))
215 fprintf (vect_dump
, "get vectype for scalar type: ");
216 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
219 vectype
= get_vectype_for_scalar_type (scalar_type
);
222 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
225 "not vectorized: unsupported data-type ");
226 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
230 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
232 if (vect_print_dump_info (REPORT_DETAILS
))
234 fprintf (vect_dump
, "vectype: ");
235 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
238 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
239 if (vect_print_dump_info (REPORT_DETAILS
))
240 fprintf (vect_dump
, "nunits = %d", nunits
);
242 if (!vectorization_factor
243 || (nunits
> vectorization_factor
))
244 vectorization_factor
= nunits
;
248 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
) || analyze_pattern_stmt
;)
252 if (analyze_pattern_stmt
)
255 stmt
= gsi_stmt (si
);
257 stmt_info
= vinfo_for_stmt (stmt
);
259 if (vect_print_dump_info (REPORT_DETAILS
))
261 fprintf (vect_dump
, "==> examining statement: ");
262 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
265 gcc_assert (stmt_info
);
267 /* Skip stmts which do not need to be vectorized. */
268 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
269 && !STMT_VINFO_LIVE_P (stmt_info
))
271 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
272 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
273 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
274 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
277 stmt_info
= vinfo_for_stmt (pattern_stmt
);
278 if (vect_print_dump_info (REPORT_DETAILS
))
280 fprintf (vect_dump
, "==> examining pattern statement: ");
281 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
286 if (vect_print_dump_info (REPORT_DETAILS
))
287 fprintf (vect_dump
, "skip.");
292 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
293 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
294 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
295 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
296 analyze_pattern_stmt
= true;
298 /* If a pattern statement has def stmts, analyze them too. */
299 if (is_pattern_stmt_p (stmt_info
))
301 if (pattern_def_seq
== NULL
)
303 pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
);
304 pattern_def_si
= gsi_start (pattern_def_seq
);
306 else if (!gsi_end_p (pattern_def_si
))
307 gsi_next (&pattern_def_si
);
308 if (pattern_def_seq
!= NULL
)
310 gimple pattern_def_stmt
= NULL
;
311 stmt_vec_info pattern_def_stmt_info
= NULL
;
313 while (!gsi_end_p (pattern_def_si
))
315 pattern_def_stmt
= gsi_stmt (pattern_def_si
);
316 pattern_def_stmt_info
317 = vinfo_for_stmt (pattern_def_stmt
);
318 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
319 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
321 gsi_next (&pattern_def_si
);
324 if (!gsi_end_p (pattern_def_si
))
326 if (vect_print_dump_info (REPORT_DETAILS
))
329 "==> examining pattern def stmt: ");
330 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0,
334 stmt
= pattern_def_stmt
;
335 stmt_info
= pattern_def_stmt_info
;
339 pattern_def_si
= gsi_none ();
340 analyze_pattern_stmt
= false;
344 analyze_pattern_stmt
= false;
347 if (gimple_get_lhs (stmt
) == NULL_TREE
)
349 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
351 fprintf (vect_dump
, "not vectorized: irregular stmt.");
352 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
357 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
359 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
361 fprintf (vect_dump
, "not vectorized: vector stmt in loop:");
362 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
367 if (STMT_VINFO_VECTYPE (stmt_info
))
369 /* The only case when a vectype had been already set is for stmts
370 that contain a dataref, or for "pattern-stmts" (stmts
371 generated by the vectorizer to represent/replace a certain
373 gcc_assert (STMT_VINFO_DATA_REF (stmt_info
)
374 || is_pattern_stmt_p (stmt_info
)
375 || !gsi_end_p (pattern_def_si
));
376 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
380 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
381 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
382 if (vect_print_dump_info (REPORT_DETAILS
))
384 fprintf (vect_dump
, "get vectype for scalar type: ");
385 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
387 vectype
= get_vectype_for_scalar_type (scalar_type
);
390 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
393 "not vectorized: unsupported data-type ");
394 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
399 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
402 /* The vectorization factor is according to the smallest
403 scalar type (or the largest vector size, but we only
404 support one vector size per loop). */
405 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
,
407 if (vect_print_dump_info (REPORT_DETAILS
))
409 fprintf (vect_dump
, "get vectype for scalar type: ");
410 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
412 vf_vectype
= get_vectype_for_scalar_type (scalar_type
);
415 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
418 "not vectorized: unsupported data-type ");
419 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
424 if ((GET_MODE_SIZE (TYPE_MODE (vectype
))
425 != GET_MODE_SIZE (TYPE_MODE (vf_vectype
))))
427 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
430 "not vectorized: different sized vector "
431 "types in statement, ");
432 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
433 fprintf (vect_dump
, " and ");
434 print_generic_expr (vect_dump
, vf_vectype
, TDF_SLIM
);
439 if (vect_print_dump_info (REPORT_DETAILS
))
441 fprintf (vect_dump
, "vectype: ");
442 print_generic_expr (vect_dump
, vf_vectype
, TDF_SLIM
);
445 nunits
= TYPE_VECTOR_SUBPARTS (vf_vectype
);
446 if (vect_print_dump_info (REPORT_DETAILS
))
447 fprintf (vect_dump
, "nunits = %d", nunits
);
449 if (!vectorization_factor
450 || (nunits
> vectorization_factor
))
451 vectorization_factor
= nunits
;
453 if (!analyze_pattern_stmt
&& gsi_end_p (pattern_def_si
))
455 pattern_def_seq
= NULL
;
461 /* TODO: Analyze cost. Decide if worth while to vectorize. */
462 if (vect_print_dump_info (REPORT_DETAILS
))
463 fprintf (vect_dump
, "vectorization factor = %d", vectorization_factor
);
464 if (vectorization_factor
<= 1)
466 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
467 fprintf (vect_dump
, "not vectorized: unsupported data-type");
470 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
476 /* Function vect_is_simple_iv_evolution.
478 FORNOW: A simple evolution of an induction variables in the loop is
479 considered a polynomial evolution with constant step. */
482 vect_is_simple_iv_evolution (unsigned loop_nb
, tree access_fn
, tree
* init
,
487 tree evolution_part
= evolution_part_in_loop_num (access_fn
, loop_nb
);
489 /* When there is no evolution in this loop, the evolution function
491 if (evolution_part
== NULL_TREE
)
494 /* When the evolution is a polynomial of degree >= 2
495 the evolution function is not "simple". */
496 if (tree_is_chrec (evolution_part
))
499 step_expr
= evolution_part
;
500 init_expr
= unshare_expr (initial_condition_in_loop_num (access_fn
, loop_nb
));
502 if (vect_print_dump_info (REPORT_DETAILS
))
504 fprintf (vect_dump
, "step: ");
505 print_generic_expr (vect_dump
, step_expr
, TDF_SLIM
);
506 fprintf (vect_dump
, ", init: ");
507 print_generic_expr (vect_dump
, init_expr
, TDF_SLIM
);
513 if (TREE_CODE (step_expr
) != INTEGER_CST
)
515 if (vect_print_dump_info (REPORT_DETAILS
))
516 fprintf (vect_dump
, "step unknown.");
523 /* Function vect_analyze_scalar_cycles_1.
525 Examine the cross iteration def-use cycles of scalar variables
526 in LOOP. LOOP_VINFO represents the loop that is now being
527 considered for vectorization (can be LOOP, or an outer-loop
531 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo
, struct loop
*loop
)
533 basic_block bb
= loop
->header
;
535 VEC(gimple
,heap
) *worklist
= VEC_alloc (gimple
, heap
, 64);
536 gimple_stmt_iterator gsi
;
539 if (vect_print_dump_info (REPORT_DETAILS
))
540 fprintf (vect_dump
, "=== vect_analyze_scalar_cycles ===");
542 /* First - identify all inductions. Reduction detection assumes that all the
543 inductions have been identified, therefore, this order must not be
545 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
547 gimple phi
= gsi_stmt (gsi
);
548 tree access_fn
= NULL
;
549 tree def
= PHI_RESULT (phi
);
550 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
552 if (vect_print_dump_info (REPORT_DETAILS
))
554 fprintf (vect_dump
, "Analyze phi: ");
555 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
558 /* Skip virtual phi's. The data dependences that are associated with
559 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
560 if (!is_gimple_reg (SSA_NAME_VAR (def
)))
563 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_unknown_def_type
;
565 /* Analyze the evolution function. */
566 access_fn
= analyze_scalar_evolution (loop
, def
);
569 STRIP_NOPS (access_fn
);
570 if (vect_print_dump_info (REPORT_DETAILS
))
572 fprintf (vect_dump
, "Access function of PHI: ");
573 print_generic_expr (vect_dump
, access_fn
, TDF_SLIM
);
575 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo
)
576 = evolution_part_in_loop_num (access_fn
, loop
->num
);
580 || !vect_is_simple_iv_evolution (loop
->num
, access_fn
, &dumy
, &dumy
))
582 VEC_safe_push (gimple
, heap
, worklist
, phi
);
586 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo
) != NULL_TREE
);
588 if (vect_print_dump_info (REPORT_DETAILS
))
589 fprintf (vect_dump
, "Detected induction.");
590 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_induction_def
;
594 /* Second - identify all reductions and nested cycles. */
595 while (VEC_length (gimple
, worklist
) > 0)
597 gimple phi
= VEC_pop (gimple
, worklist
);
598 tree def
= PHI_RESULT (phi
);
599 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (phi
);
603 if (vect_print_dump_info (REPORT_DETAILS
))
605 fprintf (vect_dump
, "Analyze phi: ");
606 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
609 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def
)));
610 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_unknown_def_type
);
612 nested_cycle
= (loop
!= LOOP_VINFO_LOOP (loop_vinfo
));
613 reduc_stmt
= vect_force_simple_reduction (loop_vinfo
, phi
, !nested_cycle
,
619 if (vect_print_dump_info (REPORT_DETAILS
))
620 fprintf (vect_dump
, "Detected double reduction.");
622 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_double_reduction_def
;
623 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
624 vect_double_reduction_def
;
630 if (vect_print_dump_info (REPORT_DETAILS
))
631 fprintf (vect_dump
, "Detected vectorizable nested cycle.");
633 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_nested_cycle
;
634 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
639 if (vect_print_dump_info (REPORT_DETAILS
))
640 fprintf (vect_dump
, "Detected reduction.");
642 STMT_VINFO_DEF_TYPE (stmt_vinfo
) = vect_reduction_def
;
643 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt
)) =
645 /* Store the reduction cycles for possible vectorization in
647 VEC_safe_push (gimple
, heap
,
648 LOOP_VINFO_REDUCTIONS (loop_vinfo
),
654 if (vect_print_dump_info (REPORT_DETAILS
))
655 fprintf (vect_dump
, "Unknown def-use cycle pattern.");
658 VEC_free (gimple
, heap
, worklist
);
662 /* Function vect_analyze_scalar_cycles.
664 Examine the cross iteration def-use cycles of scalar variables, by
665 analyzing the loop-header PHIs of scalar variables. Classify each
666 cycle as one of the following: invariant, induction, reduction, unknown.
667 We do that for the loop represented by LOOP_VINFO, and also to its
668 inner-loop, if exists.
669 Examples for scalar cycles:
684 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo
)
686 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
688 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
);
690 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
691 Reductions in such inner-loop therefore have different properties than
692 the reductions in the nest that gets vectorized:
693 1. When vectorized, they are executed in the same order as in the original
694 scalar loop, so we can't change the order of computation when
696 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
697 current checks are too strict. */
700 vect_analyze_scalar_cycles_1 (loop_vinfo
, loop
->inner
);
703 /* Function vect_get_loop_niters.
705 Determine how many iterations the loop is executed.
706 If an expression that represents the number of iterations
707 can be constructed, place it in NUMBER_OF_ITERATIONS.
708 Return the loop exit condition. */
711 vect_get_loop_niters (struct loop
*loop
, tree
*number_of_iterations
)
715 if (vect_print_dump_info (REPORT_DETAILS
))
716 fprintf (vect_dump
, "=== get_loop_niters ===");
718 niters
= number_of_exit_cond_executions (loop
);
720 if (niters
!= NULL_TREE
721 && niters
!= chrec_dont_know
)
723 *number_of_iterations
= niters
;
725 if (vect_print_dump_info (REPORT_DETAILS
))
727 fprintf (vect_dump
, "==> get_loop_niters:" );
728 print_generic_expr (vect_dump
, *number_of_iterations
, TDF_SLIM
);
732 return get_loop_exit_condition (loop
);
736 /* Function bb_in_loop_p
738 Used as predicate for dfs order traversal of the loop bbs. */
741 bb_in_loop_p (const_basic_block bb
, const void *data
)
743 const struct loop
*const loop
= (const struct loop
*)data
;
744 if (flow_bb_inside_loop_p (loop
, bb
))
750 /* Function new_loop_vec_info.
752 Create and initialize a new loop_vec_info struct for LOOP, as well as
753 stmt_vec_info structs for all the stmts in LOOP. */
756 new_loop_vec_info (struct loop
*loop
)
760 gimple_stmt_iterator si
;
761 unsigned int i
, nbbs
;
763 res
= (loop_vec_info
) xcalloc (1, sizeof (struct _loop_vec_info
));
764 LOOP_VINFO_LOOP (res
) = loop
;
766 bbs
= get_loop_body (loop
);
768 /* Create/Update stmt_info for all stmts in the loop. */
769 for (i
= 0; i
< loop
->num_nodes
; i
++)
771 basic_block bb
= bbs
[i
];
773 /* BBs in a nested inner-loop will have been already processed (because
774 we will have called vect_analyze_loop_form for any nested inner-loop).
775 Therefore, for stmts in an inner-loop we just want to update the
776 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
777 loop_info of the outer-loop we are currently considering to vectorize
778 (instead of the loop_info of the inner-loop).
779 For stmts in other BBs we need to create a stmt_info from scratch. */
780 if (bb
->loop_father
!= loop
)
783 gcc_assert (loop
->inner
&& bb
->loop_father
== loop
->inner
);
784 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
786 gimple phi
= gsi_stmt (si
);
787 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
788 loop_vec_info inner_loop_vinfo
=
789 STMT_VINFO_LOOP_VINFO (stmt_info
);
790 gcc_assert (loop
->inner
== LOOP_VINFO_LOOP (inner_loop_vinfo
));
791 STMT_VINFO_LOOP_VINFO (stmt_info
) = res
;
793 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
795 gimple stmt
= gsi_stmt (si
);
796 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
797 loop_vec_info inner_loop_vinfo
=
798 STMT_VINFO_LOOP_VINFO (stmt_info
);
799 gcc_assert (loop
->inner
== LOOP_VINFO_LOOP (inner_loop_vinfo
));
800 STMT_VINFO_LOOP_VINFO (stmt_info
) = res
;
805 /* bb in current nest. */
806 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
808 gimple phi
= gsi_stmt (si
);
809 gimple_set_uid (phi
, 0);
810 set_vinfo_for_stmt (phi
, new_stmt_vec_info (phi
, res
, NULL
));
813 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
815 gimple stmt
= gsi_stmt (si
);
816 gimple_set_uid (stmt
, 0);
817 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, res
, NULL
));
822 /* CHECKME: We want to visit all BBs before their successors (except for
823 latch blocks, for which this assertion wouldn't hold). In the simple
824 case of the loop forms we allow, a dfs order of the BBs would the same
825 as reversed postorder traversal, so we are safe. */
828 bbs
= XCNEWVEC (basic_block
, loop
->num_nodes
);
829 nbbs
= dfs_enumerate_from (loop
->header
, 0, bb_in_loop_p
,
830 bbs
, loop
->num_nodes
, loop
);
831 gcc_assert (nbbs
== loop
->num_nodes
);
833 LOOP_VINFO_BBS (res
) = bbs
;
834 LOOP_VINFO_NITERS (res
) = NULL
;
835 LOOP_VINFO_NITERS_UNCHANGED (res
) = NULL
;
836 LOOP_VINFO_COST_MODEL_MIN_ITERS (res
) = 0;
837 LOOP_VINFO_VECTORIZABLE_P (res
) = 0;
838 LOOP_PEELING_FOR_ALIGNMENT (res
) = 0;
839 LOOP_VINFO_VECT_FACTOR (res
) = 0;
840 LOOP_VINFO_LOOP_NEST (res
) = VEC_alloc (loop_p
, heap
, 3);
841 LOOP_VINFO_DATAREFS (res
) = VEC_alloc (data_reference_p
, heap
, 10);
842 LOOP_VINFO_DDRS (res
) = VEC_alloc (ddr_p
, heap
, 10 * 10);
843 LOOP_VINFO_UNALIGNED_DR (res
) = NULL
;
844 LOOP_VINFO_MAY_MISALIGN_STMTS (res
) =
845 VEC_alloc (gimple
, heap
,
846 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
));
847 LOOP_VINFO_MAY_ALIAS_DDRS (res
) =
848 VEC_alloc (ddr_p
, heap
,
849 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
));
850 LOOP_VINFO_GROUPED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
851 LOOP_VINFO_REDUCTIONS (res
) = VEC_alloc (gimple
, heap
, 10);
852 LOOP_VINFO_REDUCTION_CHAINS (res
) = VEC_alloc (gimple
, heap
, 10);
853 LOOP_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 10);
854 LOOP_VINFO_SLP_UNROLLING_FACTOR (res
) = 1;
855 LOOP_VINFO_PEELING_HTAB (res
) = NULL
;
856 LOOP_VINFO_PEELING_FOR_GAPS (res
) = false;
862 /* Function destroy_loop_vec_info.
864 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
865 stmts in the loop. */
868 destroy_loop_vec_info (loop_vec_info loop_vinfo
, bool clean_stmts
)
873 gimple_stmt_iterator si
;
875 VEC (slp_instance
, heap
) *slp_instances
;
876 slp_instance instance
;
881 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
883 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
884 nbbs
= loop
->num_nodes
;
888 free (LOOP_VINFO_BBS (loop_vinfo
));
889 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo
));
890 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo
));
891 VEC_free (loop_p
, heap
, LOOP_VINFO_LOOP_NEST (loop_vinfo
));
892 VEC_free (gimple
, heap
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
));
893 VEC_free (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
));
900 for (j
= 0; j
< nbbs
; j
++)
902 basic_block bb
= bbs
[j
];
903 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
904 free_stmt_vec_info (gsi_stmt (si
));
906 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); )
908 gimple stmt
= gsi_stmt (si
);
909 /* Free stmt_vec_info. */
910 free_stmt_vec_info (stmt
);
915 free (LOOP_VINFO_BBS (loop_vinfo
));
916 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo
));
917 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo
));
918 VEC_free (loop_p
, heap
, LOOP_VINFO_LOOP_NEST (loop_vinfo
));
919 VEC_free (gimple
, heap
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
));
920 VEC_free (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
));
921 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
922 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, j
, instance
)
923 vect_free_slp_instance (instance
);
925 VEC_free (slp_instance
, heap
, LOOP_VINFO_SLP_INSTANCES (loop_vinfo
));
926 VEC_free (gimple
, heap
, LOOP_VINFO_GROUPED_STORES (loop_vinfo
));
927 VEC_free (gimple
, heap
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
928 VEC_free (gimple
, heap
, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
));
930 if (LOOP_VINFO_PEELING_HTAB (loop_vinfo
))
931 htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo
));
938 /* Function vect_analyze_loop_1.
940 Apply a set of analyses on LOOP, and create a loop_vec_info struct
941 for it. The different analyses will record information in the
942 loop_vec_info struct. This is a subset of the analyses applied in
943 vect_analyze_loop, to be applied on an inner-loop nested in the loop
944 that is now considered for (outer-loop) vectorization. */
947 vect_analyze_loop_1 (struct loop
*loop
)
949 loop_vec_info loop_vinfo
;
951 if (vect_print_dump_info (REPORT_DETAILS
))
952 fprintf (vect_dump
, "===== analyze_loop_nest_1 =====");
954 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
956 loop_vinfo
= vect_analyze_loop_form (loop
);
959 if (vect_print_dump_info (REPORT_DETAILS
))
960 fprintf (vect_dump
, "bad inner-loop form.");
968 /* Function vect_analyze_loop_form.
970 Verify that certain CFG restrictions hold, including:
971 - the loop has a pre-header
972 - the loop has a single entry and exit
973 - the loop exit condition is simple enough, and the number of iterations
974 can be analyzed (a countable loop). */
977 vect_analyze_loop_form (struct loop
*loop
)
979 loop_vec_info loop_vinfo
;
981 tree number_of_iterations
= NULL
;
982 loop_vec_info inner_loop_vinfo
= NULL
;
984 if (vect_print_dump_info (REPORT_DETAILS
))
985 fprintf (vect_dump
, "=== vect_analyze_loop_form ===");
987 /* Different restrictions apply when we are considering an inner-most loop,
988 vs. an outer (nested) loop.
989 (FORNOW. May want to relax some of these restrictions in the future). */
993 /* Inner-most loop. We currently require that the number of BBs is
994 exactly 2 (the header and latch). Vectorizable inner-most loops
1005 if (loop
->num_nodes
!= 2)
1007 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1008 fprintf (vect_dump
, "not vectorized: control flow in loop.");
1012 if (empty_block_p (loop
->header
))
1014 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1015 fprintf (vect_dump
, "not vectorized: empty loop.");
1021 struct loop
*innerloop
= loop
->inner
;
1024 /* Nested loop. We currently require that the loop is doubly-nested,
1025 contains a single inner loop, and the number of BBs is exactly 5.
1026 Vectorizable outer-loops look like this:
1038 The inner-loop has the properties expected of inner-most loops
1039 as described above. */
1041 if ((loop
->inner
)->inner
|| (loop
->inner
)->next
)
1043 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1044 fprintf (vect_dump
, "not vectorized: multiple nested loops.");
1048 /* Analyze the inner-loop. */
1049 inner_loop_vinfo
= vect_analyze_loop_1 (loop
->inner
);
1050 if (!inner_loop_vinfo
)
1052 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1053 fprintf (vect_dump
, "not vectorized: Bad inner loop.");
1057 if (!expr_invariant_in_loop_p (loop
,
1058 LOOP_VINFO_NITERS (inner_loop_vinfo
)))
1060 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1062 "not vectorized: inner-loop count not invariant.");
1063 destroy_loop_vec_info (inner_loop_vinfo
, true);
1067 if (loop
->num_nodes
!= 5)
1069 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1070 fprintf (vect_dump
, "not vectorized: control flow in loop.");
1071 destroy_loop_vec_info (inner_loop_vinfo
, true);
1075 gcc_assert (EDGE_COUNT (innerloop
->header
->preds
) == 2);
1076 entryedge
= EDGE_PRED (innerloop
->header
, 0);
1077 if (EDGE_PRED (innerloop
->header
, 0)->src
== innerloop
->latch
)
1078 entryedge
= EDGE_PRED (innerloop
->header
, 1);
1080 if (entryedge
->src
!= loop
->header
1081 || !single_exit (innerloop
)
1082 || single_exit (innerloop
)->dest
!= EDGE_PRED (loop
->latch
, 0)->src
)
1084 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1085 fprintf (vect_dump
, "not vectorized: unsupported outerloop form.");
1086 destroy_loop_vec_info (inner_loop_vinfo
, true);
1090 if (vect_print_dump_info (REPORT_DETAILS
))
1091 fprintf (vect_dump
, "Considering outer-loop vectorization.");
1094 if (!single_exit (loop
)
1095 || EDGE_COUNT (loop
->header
->preds
) != 2)
1097 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1099 if (!single_exit (loop
))
1100 fprintf (vect_dump
, "not vectorized: multiple exits.");
1101 else if (EDGE_COUNT (loop
->header
->preds
) != 2)
1102 fprintf (vect_dump
, "not vectorized: too many incoming edges.");
1104 if (inner_loop_vinfo
)
1105 destroy_loop_vec_info (inner_loop_vinfo
, true);
1109 /* We assume that the loop exit condition is at the end of the loop. i.e,
1110 that the loop is represented as a do-while (with a proper if-guard
1111 before the loop if needed), where the loop header contains all the
1112 executable statements, and the latch is empty. */
1113 if (!empty_block_p (loop
->latch
)
1114 || !gimple_seq_empty_p (phi_nodes (loop
->latch
)))
1116 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1117 fprintf (vect_dump
, "not vectorized: unexpected loop form.");
1118 if (inner_loop_vinfo
)
1119 destroy_loop_vec_info (inner_loop_vinfo
, true);
1123 /* Make sure there exists a single-predecessor exit bb: */
1124 if (!single_pred_p (single_exit (loop
)->dest
))
1126 edge e
= single_exit (loop
);
1127 if (!(e
->flags
& EDGE_ABNORMAL
))
1129 split_loop_exit_edge (e
);
1130 if (vect_print_dump_info (REPORT_DETAILS
))
1131 fprintf (vect_dump
, "split exit edge.");
1135 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1136 fprintf (vect_dump
, "not vectorized: abnormal loop exit edge.");
1137 if (inner_loop_vinfo
)
1138 destroy_loop_vec_info (inner_loop_vinfo
, true);
1143 loop_cond
= vect_get_loop_niters (loop
, &number_of_iterations
);
1146 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1147 fprintf (vect_dump
, "not vectorized: complicated exit condition.");
1148 if (inner_loop_vinfo
)
1149 destroy_loop_vec_info (inner_loop_vinfo
, true);
1153 if (!number_of_iterations
)
1155 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1157 "not vectorized: number of iterations cannot be computed.");
1158 if (inner_loop_vinfo
)
1159 destroy_loop_vec_info (inner_loop_vinfo
, true);
1163 if (chrec_contains_undetermined (number_of_iterations
))
1165 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS
))
1166 fprintf (vect_dump
, "Infinite number of iterations.");
1167 if (inner_loop_vinfo
)
1168 destroy_loop_vec_info (inner_loop_vinfo
, true);
1172 if (!NITERS_KNOWN_P (number_of_iterations
))
1174 if (vect_print_dump_info (REPORT_DETAILS
))
1176 fprintf (vect_dump
, "Symbolic number of iterations is ");
1177 print_generic_expr (vect_dump
, number_of_iterations
, TDF_DETAILS
);
1180 else if (TREE_INT_CST_LOW (number_of_iterations
) == 0)
1182 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1183 fprintf (vect_dump
, "not vectorized: number of iterations = 0.");
1184 if (inner_loop_vinfo
)
1185 destroy_loop_vec_info (inner_loop_vinfo
, false);
1189 loop_vinfo
= new_loop_vec_info (loop
);
1190 LOOP_VINFO_NITERS (loop_vinfo
) = number_of_iterations
;
1191 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
) = number_of_iterations
;
1193 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond
)) = loop_exit_ctrl_vec_info_type
;
1195 /* CHECKME: May want to keep it around it in the future. */
1196 if (inner_loop_vinfo
)
1197 destroy_loop_vec_info (inner_loop_vinfo
, false);
1199 gcc_assert (!loop
->aux
);
1200 loop
->aux
= loop_vinfo
;
1205 /* Get cost by calling cost target builtin. */
1208 vect_get_cost (enum vect_cost_for_stmt type_of_cost
)
1210 tree dummy_type
= NULL
;
1213 return targetm
.vectorize
.builtin_vectorization_cost (type_of_cost
,
1218 /* Function vect_analyze_loop_operations.
1220 Scan the loop stmts and make sure they are all vectorizable. */
1223 vect_analyze_loop_operations (loop_vec_info loop_vinfo
, bool slp
)
1225 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1226 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1227 int nbbs
= loop
->num_nodes
;
1228 gimple_stmt_iterator si
;
1229 unsigned int vectorization_factor
= 0;
1232 stmt_vec_info stmt_info
;
1233 bool need_to_vectorize
= false;
1234 int min_profitable_iters
;
1235 int min_scalar_loop_bound
;
1237 bool only_slp_in_loop
= true, ok
;
1238 HOST_WIDE_INT max_niter
;
1240 if (vect_print_dump_info (REPORT_DETAILS
))
1241 fprintf (vect_dump
, "=== vect_analyze_loop_operations ===");
1243 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo
));
1244 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1247 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1248 vectorization factor of the loop is the unrolling factor required by
1249 the SLP instances. If that unrolling factor is 1, we say, that we
1250 perform pure SLP on loop - cross iteration parallelism is not
1252 for (i
= 0; i
< nbbs
; i
++)
1254 basic_block bb
= bbs
[i
];
1255 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1257 gimple stmt
= gsi_stmt (si
);
1258 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1259 gcc_assert (stmt_info
);
1260 if ((STMT_VINFO_RELEVANT_P (stmt_info
)
1261 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info
)))
1262 && !PURE_SLP_STMT (stmt_info
))
1263 /* STMT needs both SLP and loop-based vectorization. */
1264 only_slp_in_loop
= false;
1268 if (only_slp_in_loop
)
1269 vectorization_factor
= LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
);
1271 vectorization_factor
= least_common_multiple (vectorization_factor
,
1272 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
));
1274 LOOP_VINFO_VECT_FACTOR (loop_vinfo
) = vectorization_factor
;
1275 if (vect_print_dump_info (REPORT_DETAILS
))
1276 fprintf (vect_dump
, "Updating vectorization factor to %d ",
1277 vectorization_factor
);
1280 for (i
= 0; i
< nbbs
; i
++)
1282 basic_block bb
= bbs
[i
];
1284 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
1286 phi
= gsi_stmt (si
);
1289 stmt_info
= vinfo_for_stmt (phi
);
1290 if (vect_print_dump_info (REPORT_DETAILS
))
1292 fprintf (vect_dump
, "examining phi: ");
1293 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
1296 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1297 (i.e., a phi in the tail of the outer-loop). */
1298 if (! is_loop_header_bb_p (bb
))
1300 /* FORNOW: we currently don't support the case that these phis
1301 are not used in the outerloop (unless it is double reduction,
1302 i.e., this phi is vect_reduction_def), cause this case
1303 requires to actually do something here. */
1304 if ((!STMT_VINFO_RELEVANT_P (stmt_info
)
1305 || STMT_VINFO_LIVE_P (stmt_info
))
1306 && STMT_VINFO_DEF_TYPE (stmt_info
)
1307 != vect_double_reduction_def
)
1309 if (vect_print_dump_info (REPORT_DETAILS
))
1311 "Unsupported loop-closed phi in outer-loop.");
1315 /* If PHI is used in the outer loop, we check that its operand
1316 is defined in the inner loop. */
1317 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1322 if (gimple_phi_num_args (phi
) != 1)
1325 phi_op
= PHI_ARG_DEF (phi
, 0);
1326 if (TREE_CODE (phi_op
) != SSA_NAME
)
1329 op_def_stmt
= SSA_NAME_DEF_STMT (phi_op
);
1330 if (!op_def_stmt
|| !vinfo_for_stmt (op_def_stmt
))
1333 if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt
))
1334 != vect_used_in_outer
1335 && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt
))
1336 != vect_used_in_outer_by_reduction
)
1343 gcc_assert (stmt_info
);
1345 if (STMT_VINFO_LIVE_P (stmt_info
))
1347 /* FORNOW: not yet supported. */
1348 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1349 fprintf (vect_dump
, "not vectorized: value used after loop.");
1353 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
1354 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_induction_def
)
1356 /* A scalar-dependence cycle that we don't support. */
1357 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1358 fprintf (vect_dump
, "not vectorized: scalar dependence cycle.");
1362 if (STMT_VINFO_RELEVANT_P (stmt_info
))
1364 need_to_vectorize
= true;
1365 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
1366 ok
= vectorizable_induction (phi
, NULL
, NULL
);
1371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1374 "not vectorized: relevant phi not supported: ");
1375 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
1381 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1383 gimple stmt
= gsi_stmt (si
);
1384 if (!vect_analyze_stmt (stmt
, &need_to_vectorize
, NULL
))
1389 /* All operations in the loop are either irrelevant (deal with loop
1390 control, or dead), or only used outside the loop and can be moved
1391 out of the loop (e.g. invariants, inductions). The loop can be
1392 optimized away by scalar optimizations. We're better off not
1393 touching this loop. */
1394 if (!need_to_vectorize
)
1396 if (vect_print_dump_info (REPORT_DETAILS
))
1398 "All the computation can be taken out of the loop.");
1399 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1401 "not vectorized: redundant loop. no profit to vectorize.");
1405 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1406 && vect_print_dump_info (REPORT_DETAILS
))
1408 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC
,
1409 vectorization_factor
, LOOP_VINFO_INT_NITERS (loop_vinfo
));
1411 if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1412 && (LOOP_VINFO_INT_NITERS (loop_vinfo
) < vectorization_factor
))
1413 || ((max_niter
= max_stmt_executions_int (loop
)) != -1
1414 && (unsigned HOST_WIDE_INT
) max_niter
< vectorization_factor
))
1416 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1417 fprintf (vect_dump
, "not vectorized: iteration count too small.");
1418 if (vect_print_dump_info (REPORT_DETAILS
))
1419 fprintf (vect_dump
,"not vectorized: iteration count smaller than "
1420 "vectorization factor.");
1424 /* Analyze cost. Decide if worth while to vectorize. */
1426 /* Once VF is set, SLP costs should be updated since the number of created
1427 vector stmts depends on VF. */
1428 vect_update_slp_costs_according_to_vf (loop_vinfo
);
1430 min_profitable_iters
= vect_estimate_min_profitable_iters (loop_vinfo
);
1431 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
) = min_profitable_iters
;
1433 if (min_profitable_iters
< 0)
1435 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1436 fprintf (vect_dump
, "not vectorized: vectorization not profitable.");
1437 if (vect_print_dump_info (REPORT_DETAILS
))
1438 fprintf (vect_dump
, "not vectorized: vector version will never be "
1443 min_scalar_loop_bound
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
1444 * vectorization_factor
) - 1);
1446 /* Use the cost model only if it is more conservative than user specified
1449 th
= (unsigned) min_scalar_loop_bound
;
1450 if (min_profitable_iters
1451 && (!min_scalar_loop_bound
1452 || min_profitable_iters
> min_scalar_loop_bound
))
1453 th
= (unsigned) min_profitable_iters
;
1455 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1456 && LOOP_VINFO_INT_NITERS (loop_vinfo
) <= th
)
1458 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1459 fprintf (vect_dump
, "not vectorized: vectorization not "
1461 if (vect_print_dump_info (REPORT_DETAILS
))
1462 fprintf (vect_dump
, "not vectorized: iteration count smaller than "
1463 "user specified loop bound parameter or minimum "
1464 "profitable iterations (whichever is more conservative).");
1468 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
1469 || LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0
1470 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
1472 if (vect_print_dump_info (REPORT_DETAILS
))
1473 fprintf (vect_dump
, "epilog loop required.");
1474 if (!vect_can_advance_ivs_p (loop_vinfo
))
1476 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1478 "not vectorized: can't create epilog loop 1.");
1481 if (!slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1483 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1485 "not vectorized: can't create epilog loop 2.");
1494 /* Function vect_analyze_loop_2.
1496 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1497 for it. The different analyses will record information in the
1498 loop_vec_info struct. */
1500 vect_analyze_loop_2 (loop_vec_info loop_vinfo
)
1502 bool ok
, slp
= false;
1503 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1506 /* Find all data references in the loop (which correspond to vdefs/vuses)
1507 and analyze their evolution in the loop. Also adjust the minimal
1508 vectorization factor according to the loads and stores.
1510 FORNOW: Handle only simple, array references, which
1511 alignment can be forced, and aligned pointer-references. */
1513 ok
= vect_analyze_data_refs (loop_vinfo
, NULL
, &min_vf
);
1516 if (vect_print_dump_info (REPORT_DETAILS
))
1517 fprintf (vect_dump
, "bad data references.");
1521 /* Classify all cross-iteration scalar data-flow cycles.
1522 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1524 vect_analyze_scalar_cycles (loop_vinfo
);
1526 vect_pattern_recog (loop_vinfo
, NULL
);
1528 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1530 ok
= vect_mark_stmts_to_be_vectorized (loop_vinfo
);
1533 if (vect_print_dump_info (REPORT_DETAILS
))
1534 fprintf (vect_dump
, "unexpected pattern.");
1538 /* Analyze data dependences between the data-refs in the loop
1539 and adjust the maximum vectorization factor according to
1541 FORNOW: fail at the first data dependence that we encounter. */
1543 ok
= vect_analyze_data_ref_dependences (loop_vinfo
, NULL
, &max_vf
);
1547 if (vect_print_dump_info (REPORT_DETAILS
))
1548 fprintf (vect_dump
, "bad data dependence.");
1552 ok
= vect_determine_vectorization_factor (loop_vinfo
);
1555 if (vect_print_dump_info (REPORT_DETAILS
))
1556 fprintf (vect_dump
, "can't determine vectorization factor.");
1559 if (max_vf
< LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
1561 if (vect_print_dump_info (REPORT_DETAILS
))
1562 fprintf (vect_dump
, "bad data dependence.");
1566 /* Analyze the alignment of the data-refs in the loop.
1567 Fail if a data reference is found that cannot be vectorized. */
1569 ok
= vect_analyze_data_refs_alignment (loop_vinfo
, NULL
);
1572 if (vect_print_dump_info (REPORT_DETAILS
))
1573 fprintf (vect_dump
, "bad data alignment.");
1577 /* Analyze the access patterns of the data-refs in the loop (consecutive,
1578 complex, etc.). FORNOW: Only handle consecutive access pattern. */
1580 ok
= vect_analyze_data_ref_accesses (loop_vinfo
, NULL
);
1583 if (vect_print_dump_info (REPORT_DETAILS
))
1584 fprintf (vect_dump
, "bad data access.");
1588 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1589 It is important to call pruning after vect_analyze_data_ref_accesses,
1590 since we use grouping information gathered by interleaving analysis. */
1591 ok
= vect_prune_runtime_alias_test_list (loop_vinfo
);
1594 if (vect_print_dump_info (REPORT_DETAILS
))
1595 fprintf (vect_dump
, "too long list of versioning for alias "
1600 /* This pass will decide on using loop versioning and/or loop peeling in
1601 order to enhance the alignment of data references in the loop. */
1603 ok
= vect_enhance_data_refs_alignment (loop_vinfo
);
1606 if (vect_print_dump_info (REPORT_DETAILS
))
1607 fprintf (vect_dump
, "bad data alignment.");
1611 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
1612 ok
= vect_analyze_slp (loop_vinfo
, NULL
);
1615 /* Decide which possible SLP instances to SLP. */
1616 slp
= vect_make_slp_decision (loop_vinfo
);
1618 /* Find stmts that need to be both vectorized and SLPed. */
1619 vect_detect_hybrid_slp (loop_vinfo
);
1624 /* Scan all the operations in the loop and make sure they are
1627 ok
= vect_analyze_loop_operations (loop_vinfo
, slp
);
1630 if (vect_print_dump_info (REPORT_DETAILS
))
1631 fprintf (vect_dump
, "bad operation or unsupported loop bound.");
1638 /* Function vect_analyze_loop.
1640 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1641 for it. The different analyses will record information in the
1642 loop_vec_info struct. */
1644 vect_analyze_loop (struct loop
*loop
)
1646 loop_vec_info loop_vinfo
;
1647 unsigned int vector_sizes
;
1649 /* Autodetect first vector size we try. */
1650 current_vector_size
= 0;
1651 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
1653 if (vect_print_dump_info (REPORT_DETAILS
))
1654 fprintf (vect_dump
, "===== analyze_loop_nest =====");
1656 if (loop_outer (loop
)
1657 && loop_vec_info_for_loop (loop_outer (loop
))
1658 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop
))))
1660 if (vect_print_dump_info (REPORT_DETAILS
))
1661 fprintf (vect_dump
, "outer-loop already vectorized.");
1667 /* Check the CFG characteristics of the loop (nesting, entry/exit). */
1668 loop_vinfo
= vect_analyze_loop_form (loop
);
1671 if (vect_print_dump_info (REPORT_DETAILS
))
1672 fprintf (vect_dump
, "bad loop form.");
1676 if (vect_analyze_loop_2 (loop_vinfo
))
1678 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo
) = 1;
1683 destroy_loop_vec_info (loop_vinfo
, true);
1685 vector_sizes
&= ~current_vector_size
;
1686 if (vector_sizes
== 0
1687 || current_vector_size
== 0)
1690 /* Try the next biggest vector size. */
1691 current_vector_size
= 1 << floor_log2 (vector_sizes
);
1692 if (vect_print_dump_info (REPORT_DETAILS
))
1693 fprintf (vect_dump
, "***** Re-trying analysis with "
1694 "vector size %d\n", current_vector_size
);
1699 /* Function reduction_code_for_scalar_code
1702 CODE - tree_code of a reduction operations.
1705 REDUC_CODE - the corresponding tree-code to be used to reduce the
1706 vector of partial results into a single scalar result (which
1707 will also reside in a vector) or ERROR_MARK if the operation is
1708 a supported reduction operation, but does not have such tree-code.
1710 Return FALSE if CODE currently cannot be vectorized as reduction. */
1713 reduction_code_for_scalar_code (enum tree_code code
,
1714 enum tree_code
*reduc_code
)
1719 *reduc_code
= REDUC_MAX_EXPR
;
1723 *reduc_code
= REDUC_MIN_EXPR
;
1727 *reduc_code
= REDUC_PLUS_EXPR
;
1735 *reduc_code
= ERROR_MARK
;
1744 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
1745 STMT is printed with a message MSG. */
1748 report_vect_op (gimple stmt
, const char *msg
)
1750 fprintf (vect_dump
, "%s", msg
);
1751 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1755 /* Detect SLP reduction of the form:
1765 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1766 FIRST_STMT is the first reduction stmt in the chain
1767 (a2 = operation (a1)).
1769 Return TRUE if a reduction chain was detected. */
1772 vect_is_slp_reduction (loop_vec_info loop_info
, gimple phi
, gimple first_stmt
)
1774 struct loop
*loop
= (gimple_bb (phi
))->loop_father
;
1775 struct loop
*vect_loop
= LOOP_VINFO_LOOP (loop_info
);
1776 enum tree_code code
;
1777 gimple current_stmt
= NULL
, loop_use_stmt
= NULL
, first
, next_stmt
;
1778 stmt_vec_info use_stmt_info
, current_stmt_info
;
1780 imm_use_iterator imm_iter
;
1781 use_operand_p use_p
;
1782 int nloop_uses
, size
= 0, n_out_of_loop_uses
;
1785 if (loop
!= vect_loop
)
1788 lhs
= PHI_RESULT (phi
);
1789 code
= gimple_assign_rhs_code (first_stmt
);
1793 n_out_of_loop_uses
= 0;
1794 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
1796 gimple use_stmt
= USE_STMT (use_p
);
1797 if (is_gimple_debug (use_stmt
))
1800 use_stmt
= USE_STMT (use_p
);
1802 /* Check if we got back to the reduction phi. */
1803 if (use_stmt
== phi
)
1805 loop_use_stmt
= use_stmt
;
1810 if (flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1812 if (vinfo_for_stmt (use_stmt
)
1813 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1815 loop_use_stmt
= use_stmt
;
1820 n_out_of_loop_uses
++;
1822 /* There are can be either a single use in the loop or two uses in
1824 if (nloop_uses
> 1 || (n_out_of_loop_uses
&& nloop_uses
))
1831 /* We reached a statement with no loop uses. */
1832 if (nloop_uses
== 0)
1835 /* This is a loop exit phi, and we haven't reached the reduction phi. */
1836 if (gimple_code (loop_use_stmt
) == GIMPLE_PHI
)
1839 if (!is_gimple_assign (loop_use_stmt
)
1840 || code
!= gimple_assign_rhs_code (loop_use_stmt
)
1841 || !flow_bb_inside_loop_p (loop
, gimple_bb (loop_use_stmt
)))
1844 /* Insert USE_STMT into reduction chain. */
1845 use_stmt_info
= vinfo_for_stmt (loop_use_stmt
);
1848 current_stmt_info
= vinfo_for_stmt (current_stmt
);
1849 GROUP_NEXT_ELEMENT (current_stmt_info
) = loop_use_stmt
;
1850 GROUP_FIRST_ELEMENT (use_stmt_info
)
1851 = GROUP_FIRST_ELEMENT (current_stmt_info
);
1854 GROUP_FIRST_ELEMENT (use_stmt_info
) = loop_use_stmt
;
1856 lhs
= gimple_assign_lhs (loop_use_stmt
);
1857 current_stmt
= loop_use_stmt
;
1861 if (!found
|| loop_use_stmt
!= phi
|| size
< 2)
1864 /* Swap the operands, if needed, to make the reduction operand be the second
1866 lhs
= PHI_RESULT (phi
);
1867 next_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt
));
1870 if (gimple_assign_rhs2 (next_stmt
) == lhs
)
1872 tree op
= gimple_assign_rhs1 (next_stmt
);
1873 gimple def_stmt
= NULL
;
1875 if (TREE_CODE (op
) == SSA_NAME
)
1876 def_stmt
= SSA_NAME_DEF_STMT (op
);
1878 /* Check that the other def is either defined in the loop
1879 ("vect_internal_def"), or it's an induction (defined by a
1880 loop-header phi-node). */
1882 && gimple_bb (def_stmt
)
1883 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
1884 && (is_gimple_assign (def_stmt
)
1885 || is_gimple_call (def_stmt
)
1886 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
1887 == vect_induction_def
1888 || (gimple_code (def_stmt
) == GIMPLE_PHI
1889 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
1890 == vect_internal_def
1891 && !is_loop_header_bb_p (gimple_bb (def_stmt
)))))
1893 lhs
= gimple_assign_lhs (next_stmt
);
1894 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1902 tree op
= gimple_assign_rhs2 (next_stmt
);
1903 gimple def_stmt
= NULL
;
1905 if (TREE_CODE (op
) == SSA_NAME
)
1906 def_stmt
= SSA_NAME_DEF_STMT (op
);
1908 /* Check that the other def is either defined in the loop
1909 ("vect_internal_def"), or it's an induction (defined by a
1910 loop-header phi-node). */
1912 && gimple_bb (def_stmt
)
1913 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
1914 && (is_gimple_assign (def_stmt
)
1915 || is_gimple_call (def_stmt
)
1916 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
1917 == vect_induction_def
1918 || (gimple_code (def_stmt
) == GIMPLE_PHI
1919 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
1920 == vect_internal_def
1921 && !is_loop_header_bb_p (gimple_bb (def_stmt
)))))
1923 if (vect_print_dump_info (REPORT_DETAILS
))
1925 fprintf (vect_dump
, "swapping oprnds: ");
1926 print_gimple_stmt (vect_dump
, next_stmt
, 0, TDF_SLIM
);
1929 swap_tree_operands (next_stmt
,
1930 gimple_assign_rhs1_ptr (next_stmt
),
1931 gimple_assign_rhs2_ptr (next_stmt
));
1932 update_stmt (next_stmt
);
1938 lhs
= gimple_assign_lhs (next_stmt
);
1939 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1942 /* Save the chain for further analysis in SLP detection. */
1943 first
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt
));
1944 VEC_safe_push (gimple
, heap
, LOOP_VINFO_REDUCTION_CHAINS (loop_info
), first
);
1945 GROUP_SIZE (vinfo_for_stmt (first
)) = size
;
1951 /* Function vect_is_simple_reduction_1
1953 (1) Detect a cross-iteration def-use cycle that represents a simple
1954 reduction computation. We look for the following pattern:
1959 a2 = operation (a3, a1)
1962 1. operation is commutative and associative and it is safe to
1963 change the order of the computation (if CHECK_REDUCTION is true)
1964 2. no uses for a2 in the loop (a2 is used out of the loop)
1965 3. no uses of a1 in the loop besides the reduction operation
1966 4. no uses of a1 outside the loop.
1968 Conditions 1,4 are tested here.
1969 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
1971 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
1972 nested cycles, if CHECK_REDUCTION is false.
1974 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
1978 inner loop (def of a3)
1981 If MODIFY is true it tries also to rework the code in-place to enable
1982 detection of more reduction patterns. For the time being we rewrite
1983 "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
1987 vect_is_simple_reduction_1 (loop_vec_info loop_info
, gimple phi
,
1988 bool check_reduction
, bool *double_reduc
,
1991 struct loop
*loop
= (gimple_bb (phi
))->loop_father
;
1992 struct loop
*vect_loop
= LOOP_VINFO_LOOP (loop_info
);
1993 edge latch_e
= loop_latch_edge (loop
);
1994 tree loop_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, latch_e
);
1995 gimple def_stmt
, def1
= NULL
, def2
= NULL
;
1996 enum tree_code orig_code
, code
;
1997 tree op1
, op2
, op3
= NULL_TREE
, op4
= NULL_TREE
;
2001 imm_use_iterator imm_iter
;
2002 use_operand_p use_p
;
2005 *double_reduc
= false;
2007 /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2008 otherwise, we assume outer loop vectorization. */
2009 gcc_assert ((check_reduction
&& loop
== vect_loop
)
2010 || (!check_reduction
&& flow_loop_nested_p (vect_loop
, loop
)));
2012 name
= PHI_RESULT (phi
);
2014 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, name
)
2016 gimple use_stmt
= USE_STMT (use_p
);
2017 if (is_gimple_debug (use_stmt
))
2020 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2022 if (vect_print_dump_info (REPORT_DETAILS
))
2023 fprintf (vect_dump
, "intermediate value used outside loop.");
2028 if (vinfo_for_stmt (use_stmt
)
2029 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt
)))
2033 if (vect_print_dump_info (REPORT_DETAILS
))
2034 fprintf (vect_dump
, "reduction used in loop.");
2039 if (TREE_CODE (loop_arg
) != SSA_NAME
)
2041 if (vect_print_dump_info (REPORT_DETAILS
))
2043 fprintf (vect_dump
, "reduction: not ssa_name: ");
2044 print_generic_expr (vect_dump
, loop_arg
, TDF_SLIM
);
2049 def_stmt
= SSA_NAME_DEF_STMT (loop_arg
);
2052 if (vect_print_dump_info (REPORT_DETAILS
))
2053 fprintf (vect_dump
, "reduction: no def_stmt.");
2057 if (!is_gimple_assign (def_stmt
) && gimple_code (def_stmt
) != GIMPLE_PHI
)
2059 if (vect_print_dump_info (REPORT_DETAILS
))
2060 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
2064 if (is_gimple_assign (def_stmt
))
2066 name
= gimple_assign_lhs (def_stmt
);
2071 name
= PHI_RESULT (def_stmt
);
2076 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, name
)
2078 gimple use_stmt
= USE_STMT (use_p
);
2079 if (is_gimple_debug (use_stmt
))
2081 if (flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
))
2082 && vinfo_for_stmt (use_stmt
)
2083 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt
)))
2087 if (vect_print_dump_info (REPORT_DETAILS
))
2088 fprintf (vect_dump
, "reduction used in loop.");
2093 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2094 defined in the inner loop. */
2097 op1
= PHI_ARG_DEF (def_stmt
, 0);
2099 if (gimple_phi_num_args (def_stmt
) != 1
2100 || TREE_CODE (op1
) != SSA_NAME
)
2102 if (vect_print_dump_info (REPORT_DETAILS
))
2103 fprintf (vect_dump
, "unsupported phi node definition.");
2108 def1
= SSA_NAME_DEF_STMT (op1
);
2109 if (flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
2111 && flow_bb_inside_loop_p (loop
->inner
, gimple_bb (def1
))
2112 && is_gimple_assign (def1
))
2114 if (vect_print_dump_info (REPORT_DETAILS
))
2115 report_vect_op (def_stmt
, "detected double reduction: ");
2117 *double_reduc
= true;
2124 code
= orig_code
= gimple_assign_rhs_code (def_stmt
);
2126 /* We can handle "res -= x[i]", which is non-associative by
2127 simply rewriting this into "res += -x[i]". Avoid changing
2128 gimple instruction for the first simple tests and only do this
2129 if we're allowed to change code at all. */
2130 if (code
== MINUS_EXPR
2132 && (op1
= gimple_assign_rhs1 (def_stmt
))
2133 && TREE_CODE (op1
) == SSA_NAME
2134 && SSA_NAME_DEF_STMT (op1
) == phi
)
2138 && (!commutative_tree_code (code
) || !associative_tree_code (code
)))
2140 if (vect_print_dump_info (REPORT_DETAILS
))
2141 report_vect_op (def_stmt
, "reduction: not commutative/associative: ");
2145 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
2147 if (code
!= COND_EXPR
)
2149 if (vect_print_dump_info (REPORT_DETAILS
))
2150 report_vect_op (def_stmt
, "reduction: not binary operation: ");
2155 op3
= gimple_assign_rhs1 (def_stmt
);
2156 if (COMPARISON_CLASS_P (op3
))
2158 op4
= TREE_OPERAND (op3
, 1);
2159 op3
= TREE_OPERAND (op3
, 0);
2162 op1
= gimple_assign_rhs2 (def_stmt
);
2163 op2
= gimple_assign_rhs3 (def_stmt
);
2165 if (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op2
) != SSA_NAME
)
2167 if (vect_print_dump_info (REPORT_DETAILS
))
2168 report_vect_op (def_stmt
, "reduction: uses not ssa_names: ");
2175 op1
= gimple_assign_rhs1 (def_stmt
);
2176 op2
= gimple_assign_rhs2 (def_stmt
);
2178 if (TREE_CODE (op1
) != SSA_NAME
&& TREE_CODE (op2
) != SSA_NAME
)
2180 if (vect_print_dump_info (REPORT_DETAILS
))
2181 report_vect_op (def_stmt
, "reduction: uses not ssa_names: ");
2187 type
= TREE_TYPE (gimple_assign_lhs (def_stmt
));
2188 if ((TREE_CODE (op1
) == SSA_NAME
2189 && !types_compatible_p (type
,TREE_TYPE (op1
)))
2190 || (TREE_CODE (op2
) == SSA_NAME
2191 && !types_compatible_p (type
, TREE_TYPE (op2
)))
2192 || (op3
&& TREE_CODE (op3
) == SSA_NAME
2193 && !types_compatible_p (type
, TREE_TYPE (op3
)))
2194 || (op4
&& TREE_CODE (op4
) == SSA_NAME
2195 && !types_compatible_p (type
, TREE_TYPE (op4
))))
2197 if (vect_print_dump_info (REPORT_DETAILS
))
2199 fprintf (vect_dump
, "reduction: multiple types: operation type: ");
2200 print_generic_expr (vect_dump
, type
, TDF_SLIM
);
2201 fprintf (vect_dump
, ", operands types: ");
2202 print_generic_expr (vect_dump
, TREE_TYPE (op1
), TDF_SLIM
);
2203 fprintf (vect_dump
, ",");
2204 print_generic_expr (vect_dump
, TREE_TYPE (op2
), TDF_SLIM
);
2207 fprintf (vect_dump
, ",");
2208 print_generic_expr (vect_dump
, TREE_TYPE (op3
), TDF_SLIM
);
2213 fprintf (vect_dump
, ",");
2214 print_generic_expr (vect_dump
, TREE_TYPE (op4
), TDF_SLIM
);
2221 /* Check that it's ok to change the order of the computation.
2222 Generally, when vectorizing a reduction we change the order of the
2223 computation. This may change the behavior of the program in some
2224 cases, so we need to check that this is ok. One exception is when
2225 vectorizing an outer-loop: the inner-loop is executed sequentially,
2226 and therefore vectorizing reductions in the inner-loop during
2227 outer-loop vectorization is safe. */
2229 /* CHECKME: check for !flag_finite_math_only too? */
2230 if (SCALAR_FLOAT_TYPE_P (type
) && !flag_associative_math
2233 /* Changing the order of operations changes the semantics. */
2234 if (vect_print_dump_info (REPORT_DETAILS
))
2235 report_vect_op (def_stmt
, "reduction: unsafe fp math optimization: ");
2238 else if (INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_TRAPS (type
)
2241 /* Changing the order of operations changes the semantics. */
2242 if (vect_print_dump_info (REPORT_DETAILS
))
2243 report_vect_op (def_stmt
, "reduction: unsafe int math optimization: ");
2246 else if (SAT_FIXED_POINT_TYPE_P (type
) && check_reduction
)
2248 /* Changing the order of operations changes the semantics. */
2249 if (vect_print_dump_info (REPORT_DETAILS
))
2250 report_vect_op (def_stmt
,
2251 "reduction: unsafe fixed-point math optimization: ");
2255 /* If we detected "res -= x[i]" earlier, rewrite it into
2256 "res += -x[i]" now. If this turns out to be useless reassoc
2257 will clean it up again. */
2258 if (orig_code
== MINUS_EXPR
)
2260 tree rhs
= gimple_assign_rhs2 (def_stmt
);
2261 tree negrhs
= make_ssa_name (SSA_NAME_VAR (rhs
), NULL
);
2262 gimple negate_stmt
= gimple_build_assign_with_ops (NEGATE_EXPR
, negrhs
,
2264 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
2265 set_vinfo_for_stmt (negate_stmt
, new_stmt_vec_info (negate_stmt
,
2267 gsi_insert_before (&gsi
, negate_stmt
, GSI_NEW_STMT
);
2268 gimple_assign_set_rhs2 (def_stmt
, negrhs
);
2269 gimple_assign_set_rhs_code (def_stmt
, PLUS_EXPR
);
2270 update_stmt (def_stmt
);
2273 /* Reduction is safe. We're dealing with one of the following:
2274 1) integer arithmetic and no trapv
2275 2) floating point arithmetic, and special flags permit this optimization
2276 3) nested cycle (i.e., outer loop vectorization). */
2277 if (TREE_CODE (op1
) == SSA_NAME
)
2278 def1
= SSA_NAME_DEF_STMT (op1
);
2280 if (TREE_CODE (op2
) == SSA_NAME
)
2281 def2
= SSA_NAME_DEF_STMT (op2
);
2283 if (code
!= COND_EXPR
2284 && ((!def1
|| gimple_nop_p (def1
)) && (!def2
|| gimple_nop_p (def2
))))
2286 if (vect_print_dump_info (REPORT_DETAILS
))
2287 report_vect_op (def_stmt
, "reduction: no defs for operands: ");
2291 /* Check that one def is the reduction def, defined by PHI,
2292 the other def is either defined in the loop ("vect_internal_def"),
2293 or it's an induction (defined by a loop-header phi-node). */
2295 if (def2
&& def2
== phi
2296 && (code
== COND_EXPR
2297 || !def1
|| gimple_nop_p (def1
)
2298 || (def1
&& flow_bb_inside_loop_p (loop
, gimple_bb (def1
))
2299 && (is_gimple_assign (def1
)
2300 || is_gimple_call (def1
)
2301 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1
))
2302 == vect_induction_def
2303 || (gimple_code (def1
) == GIMPLE_PHI
2304 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1
))
2305 == vect_internal_def
2306 && !is_loop_header_bb_p (gimple_bb (def1
)))))))
2308 if (vect_print_dump_info (REPORT_DETAILS
))
2309 report_vect_op (def_stmt
, "detected reduction: ");
2313 if (def1
&& def1
== phi
2314 && (code
== COND_EXPR
2315 || !def2
|| gimple_nop_p (def2
)
2316 || (def2
&& flow_bb_inside_loop_p (loop
, gimple_bb (def2
))
2317 && (is_gimple_assign (def2
)
2318 || is_gimple_call (def2
)
2319 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2
))
2320 == vect_induction_def
2321 || (gimple_code (def2
) == GIMPLE_PHI
2322 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2
))
2323 == vect_internal_def
2324 && !is_loop_header_bb_p (gimple_bb (def2
)))))))
2326 if (check_reduction
)
2328 /* Swap operands (just for simplicity - so that the rest of the code
2329 can assume that the reduction variable is always the last (second)
2331 if (vect_print_dump_info (REPORT_DETAILS
))
2332 report_vect_op (def_stmt
,
2333 "detected reduction: need to swap operands: ");
2335 swap_tree_operands (def_stmt
, gimple_assign_rhs1_ptr (def_stmt
),
2336 gimple_assign_rhs2_ptr (def_stmt
));
2340 if (vect_print_dump_info (REPORT_DETAILS
))
2341 report_vect_op (def_stmt
, "detected reduction: ");
2347 /* Try to find SLP reduction chain. */
2348 if (check_reduction
&& vect_is_slp_reduction (loop_info
, phi
, def_stmt
))
2350 if (vect_print_dump_info (REPORT_DETAILS
))
2351 report_vect_op (def_stmt
, "reduction: detected reduction chain: ");
2356 if (vect_print_dump_info (REPORT_DETAILS
))
2357 report_vect_op (def_stmt
, "reduction: unknown pattern: ");
2362 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2363 in-place. Arguments as there. */
2366 vect_is_simple_reduction (loop_vec_info loop_info
, gimple phi
,
2367 bool check_reduction
, bool *double_reduc
)
2369 return vect_is_simple_reduction_1 (loop_info
, phi
, check_reduction
,
2370 double_reduc
, false);
2373 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2374 in-place if it enables detection of more reductions. Arguments
2378 vect_force_simple_reduction (loop_vec_info loop_info
, gimple phi
,
2379 bool check_reduction
, bool *double_reduc
)
2381 return vect_is_simple_reduction_1 (loop_info
, phi
, check_reduction
,
2382 double_reduc
, true);
2385 /* Calculate the cost of one scalar iteration of the loop. */
2387 vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo
)
2389 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2390 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2391 int nbbs
= loop
->num_nodes
, factor
, scalar_single_iter_cost
= 0;
2392 int innerloop_iters
, i
, stmt_cost
;
2394 /* Count statements in scalar loop. Using this as scalar cost for a single
2397 TODO: Add outer loop support.
2399 TODO: Consider assigning different costs to different scalar
2403 innerloop_iters
= 1;
2405 innerloop_iters
= 50; /* FIXME */
2407 for (i
= 0; i
< nbbs
; i
++)
2409 gimple_stmt_iterator si
;
2410 basic_block bb
= bbs
[i
];
2412 if (bb
->loop_father
== loop
->inner
)
2413 factor
= innerloop_iters
;
2417 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2419 gimple stmt
= gsi_stmt (si
);
2420 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2422 if (!is_gimple_assign (stmt
) && !is_gimple_call (stmt
))
2425 /* Skip stmts that are not vectorized inside the loop. */
2427 && !STMT_VINFO_RELEVANT_P (stmt_info
)
2428 && (!STMT_VINFO_LIVE_P (stmt_info
)
2429 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info
)))
2430 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
2433 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
)))
2435 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
2436 stmt_cost
= vect_get_cost (scalar_load
);
2438 stmt_cost
= vect_get_cost (scalar_store
);
2441 stmt_cost
= vect_get_cost (scalar_stmt
);
2443 scalar_single_iter_cost
+= stmt_cost
* factor
;
2446 return scalar_single_iter_cost
;
2449 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
2451 vect_get_known_peeling_cost (loop_vec_info loop_vinfo
, int peel_iters_prologue
,
2452 int *peel_iters_epilogue
,
2453 int scalar_single_iter_cost
)
2455 int peel_guard_costs
= 0;
2456 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2458 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
2460 *peel_iters_epilogue
= vf
/2;
2461 if (vect_print_dump_info (REPORT_COST
))
2462 fprintf (vect_dump
, "cost model: "
2463 "epilogue peel iters set to vf/2 because "
2464 "loop iterations are unknown .");
2466 /* If peeled iterations are known but number of scalar loop
2467 iterations are unknown, count a taken branch per peeled loop. */
2468 peel_guard_costs
= 2 * vect_get_cost (cond_branch_taken
);
2472 int niters
= LOOP_VINFO_INT_NITERS (loop_vinfo
);
2473 peel_iters_prologue
= niters
< peel_iters_prologue
?
2474 niters
: peel_iters_prologue
;
2475 *peel_iters_epilogue
= (niters
- peel_iters_prologue
) % vf
;
2476 /* If we need to peel for gaps, but no peeling is required, we have to
2477 peel VF iterations. */
2478 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) && !*peel_iters_epilogue
)
2479 *peel_iters_epilogue
= vf
;
2482 return (peel_iters_prologue
* scalar_single_iter_cost
)
2483 + (*peel_iters_epilogue
* scalar_single_iter_cost
)
2487 /* Function vect_estimate_min_profitable_iters
2489 Return the number of iterations required for the vector version of the
2490 loop to be profitable relative to the cost of the scalar version of the
2493 TODO: Take profile info into account before making vectorization
2494 decisions, if available. */
2497 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo
)
2500 int min_profitable_iters
;
2501 int peel_iters_prologue
;
2502 int peel_iters_epilogue
;
2503 int vec_inside_cost
= 0;
2504 int vec_outside_cost
= 0;
2505 int scalar_single_iter_cost
= 0;
2506 int scalar_outside_cost
= 0;
2507 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2508 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2509 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2510 int nbbs
= loop
->num_nodes
;
2511 int npeel
= LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
);
2512 int peel_guard_costs
= 0;
2513 int innerloop_iters
= 0, factor
;
2514 VEC (slp_instance
, heap
) *slp_instances
;
2515 slp_instance instance
;
2517 /* Cost model disabled. */
2518 if (!flag_vect_cost_model
)
2520 if (vect_print_dump_info (REPORT_COST
))
2521 fprintf (vect_dump
, "cost model disabled.");
2525 /* Requires loop versioning tests to handle misalignment. */
2526 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
2528 /* FIXME: Make cost depend on complexity of individual check. */
2530 VEC_length (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
));
2531 if (vect_print_dump_info (REPORT_COST
))
2532 fprintf (vect_dump
, "cost model: Adding cost of checks for loop "
2533 "versioning to treat misalignment.\n");
2536 /* Requires loop versioning with alias checks. */
2537 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2539 /* FIXME: Make cost depend on complexity of individual check. */
2541 VEC_length (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
));
2542 if (vect_print_dump_info (REPORT_COST
))
2543 fprintf (vect_dump
, "cost model: Adding cost of checks for loop "
2544 "versioning aliasing.\n");
2547 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
2548 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2549 vec_outside_cost
+= vect_get_cost (cond_branch_taken
);
2551 /* Count statements in scalar loop. Using this as scalar cost for a single
2554 TODO: Add outer loop support.
2556 TODO: Consider assigning different costs to different scalar
2561 innerloop_iters
= 50; /* FIXME */
2563 for (i
= 0; i
< nbbs
; i
++)
2565 gimple_stmt_iterator si
;
2566 basic_block bb
= bbs
[i
];
2568 if (bb
->loop_father
== loop
->inner
)
2569 factor
= innerloop_iters
;
2573 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2575 gimple stmt
= gsi_stmt (si
);
2576 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2578 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2580 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2581 stmt_info
= vinfo_for_stmt (stmt
);
2584 /* Skip stmts that are not vectorized inside the loop. */
2585 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
2586 && (!STMT_VINFO_LIVE_P (stmt_info
)
2587 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info
))))
2590 vec_inside_cost
+= STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
) * factor
;
2591 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
2592 some of the "outside" costs are generated inside the outer-loop. */
2593 vec_outside_cost
+= STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
);
2594 if (is_pattern_stmt_p (stmt_info
)
2595 && STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
))
2597 gimple_stmt_iterator gsi
;
2599 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
2600 !gsi_end_p (gsi
); gsi_next (&gsi
))
2602 gimple pattern_def_stmt
= gsi_stmt (gsi
);
2603 stmt_vec_info pattern_def_stmt_info
2604 = vinfo_for_stmt (pattern_def_stmt
);
2605 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
2606 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
2609 += STMT_VINFO_INSIDE_OF_LOOP_COST
2610 (pattern_def_stmt_info
) * factor
;
2612 += STMT_VINFO_OUTSIDE_OF_LOOP_COST
2613 (pattern_def_stmt_info
);
2620 scalar_single_iter_cost
= vect_get_single_scalar_iteraion_cost (loop_vinfo
);
2622 /* Add additional cost for the peeled instructions in prologue and epilogue
2625 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2626 at compile-time - we assume it's vf/2 (the worst would be vf-1).
2628 TODO: Build an expression that represents peel_iters for prologue and
2629 epilogue to be used in a run-time test. */
2633 peel_iters_prologue
= vf
/2;
2634 if (vect_print_dump_info (REPORT_COST
))
2635 fprintf (vect_dump
, "cost model: "
2636 "prologue peel iters set to vf/2.");
2638 /* If peeling for alignment is unknown, loop bound of main loop becomes
2640 peel_iters_epilogue
= vf
/2;
2641 if (vect_print_dump_info (REPORT_COST
))
2642 fprintf (vect_dump
, "cost model: "
2643 "epilogue peel iters set to vf/2 because "
2644 "peeling for alignment is unknown .");
2646 /* If peeled iterations are unknown, count a taken branch and a not taken
2647 branch per peeled loop. Even if scalar loop iterations are known,
2648 vector iterations are not known since peeled prologue iterations are
2649 not known. Hence guards remain the same. */
2650 peel_guard_costs
+= 2 * (vect_get_cost (cond_branch_taken
)
2651 + vect_get_cost (cond_branch_not_taken
));
2652 vec_outside_cost
+= (peel_iters_prologue
* scalar_single_iter_cost
)
2653 + (peel_iters_epilogue
* scalar_single_iter_cost
)
2658 peel_iters_prologue
= npeel
;
2659 vec_outside_cost
+= vect_get_known_peeling_cost (loop_vinfo
,
2660 peel_iters_prologue
, &peel_iters_epilogue
,
2661 scalar_single_iter_cost
);
2664 /* FORNOW: The scalar outside cost is incremented in one of the
2667 1. The vectorizer checks for alignment and aliasing and generates
2668 a condition that allows dynamic vectorization. A cost model
2669 check is ANDED with the versioning condition. Hence scalar code
2670 path now has the added cost of the versioning check.
2672 if (cost > th & versioning_check)
2675 Hence run-time scalar is incremented by not-taken branch cost.
2677 2. The vectorizer then checks if a prologue is required. If the
2678 cost model check was not done before during versioning, it has to
2679 be done before the prologue check.
2682 prologue = scalar_iters
2687 if (prologue == num_iters)
2690 Hence the run-time scalar cost is incremented by a taken branch,
2691 plus a not-taken branch, plus a taken branch cost.
2693 3. The vectorizer then checks if an epilogue is required. If the
2694 cost model check was not done before during prologue check, it
2695 has to be done with the epilogue check.
2701 if (prologue == num_iters)
2704 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2707 Hence the run-time scalar cost should be incremented by 2 taken
2710 TODO: The back end may reorder the BBS's differently and reverse
2711 conditions/branch directions. Change the estimates below to
2712 something more reasonable. */
2714 /* If the number of iterations is known and we do not do versioning, we can
2715 decide whether to vectorize at compile time. Hence the scalar version
2716 do not carry cost model guard costs. */
2717 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
2718 || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
2719 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2721 /* Cost model check occurs at versioning. */
2722 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
2723 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
2724 scalar_outside_cost
+= vect_get_cost (cond_branch_not_taken
);
2727 /* Cost model check occurs at prologue generation. */
2728 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) < 0)
2729 scalar_outside_cost
+= 2 * vect_get_cost (cond_branch_taken
)
2730 + vect_get_cost (cond_branch_not_taken
);
2731 /* Cost model check occurs at epilogue generation. */
2733 scalar_outside_cost
+= 2 * vect_get_cost (cond_branch_taken
);
2737 /* Add SLP costs. */
2738 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2739 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2741 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
2742 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
2745 /* Calculate number of iterations required to make the vector version
2746 profitable, relative to the loop bodies only. The following condition
2748 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2750 SIC = scalar iteration cost, VIC = vector iteration cost,
2751 VOC = vector outside cost, VF = vectorization factor,
2752 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2753 SOC = scalar outside cost for run time cost model check. */
2755 if ((scalar_single_iter_cost
* vf
) > vec_inside_cost
)
2757 if (vec_outside_cost
<= 0)
2758 min_profitable_iters
= 1;
2761 min_profitable_iters
= ((vec_outside_cost
- scalar_outside_cost
) * vf
2762 - vec_inside_cost
* peel_iters_prologue
2763 - vec_inside_cost
* peel_iters_epilogue
)
2764 / ((scalar_single_iter_cost
* vf
)
2767 if ((scalar_single_iter_cost
* vf
* min_profitable_iters
)
2768 <= ((vec_inside_cost
* min_profitable_iters
)
2769 + ((vec_outside_cost
- scalar_outside_cost
) * vf
)))
2770 min_profitable_iters
++;
2773 /* vector version will never be profitable. */
2776 if (vect_print_dump_info (REPORT_COST
))
2777 fprintf (vect_dump
, "cost model: the vector iteration cost = %d "
2778 "divided by the scalar iteration cost = %d "
2779 "is greater or equal to the vectorization factor = %d.",
2780 vec_inside_cost
, scalar_single_iter_cost
, vf
);
2784 if (vect_print_dump_info (REPORT_COST
))
2786 fprintf (vect_dump
, "Cost model analysis: \n");
2787 fprintf (vect_dump
, " Vector inside of loop cost: %d\n",
2789 fprintf (vect_dump
, " Vector outside of loop cost: %d\n",
2791 fprintf (vect_dump
, " Scalar iteration cost: %d\n",
2792 scalar_single_iter_cost
);
2793 fprintf (vect_dump
, " Scalar outside cost: %d\n", scalar_outside_cost
);
2794 fprintf (vect_dump
, " prologue iterations: %d\n",
2795 peel_iters_prologue
);
2796 fprintf (vect_dump
, " epilogue iterations: %d\n",
2797 peel_iters_epilogue
);
2798 fprintf (vect_dump
, " Calculated minimum iters for profitability: %d\n",
2799 min_profitable_iters
);
2802 min_profitable_iters
=
2803 min_profitable_iters
< vf
? vf
: min_profitable_iters
;
2805 /* Because the condition we create is:
2806 if (niters <= min_profitable_iters)
2807 then skip the vectorized loop. */
2808 min_profitable_iters
--;
2810 if (vect_print_dump_info (REPORT_COST
))
2811 fprintf (vect_dump
, " Profitability threshold = %d\n",
2812 min_profitable_iters
);
2814 return min_profitable_iters
;
2818 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2819 functions. Design better to avoid maintenance issues. */
2821 /* Function vect_model_reduction_cost.
2823 Models cost for a reduction operation, including the vector ops
2824 generated within the strip-mine loop, the initial definition before
2825 the loop, and the epilogue code that must be generated. */
2828 vect_model_reduction_cost (stmt_vec_info stmt_info
, enum tree_code reduc_code
,
2832 enum tree_code code
;
2835 gimple stmt
, orig_stmt
;
2837 enum machine_mode mode
;
2838 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2839 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2842 /* Cost of reduction op inside loop. */
2843 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
)
2844 += ncopies
* vect_get_cost (vector_stmt
);
2846 stmt
= STMT_VINFO_STMT (stmt_info
);
2848 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
2850 case GIMPLE_SINGLE_RHS
:
2851 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
)) == ternary_op
);
2852 reduction_op
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 2);
2854 case GIMPLE_UNARY_RHS
:
2855 reduction_op
= gimple_assign_rhs1 (stmt
);
2857 case GIMPLE_BINARY_RHS
:
2858 reduction_op
= gimple_assign_rhs2 (stmt
);
2860 case GIMPLE_TERNARY_RHS
:
2861 reduction_op
= gimple_assign_rhs3 (stmt
);
2867 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
2870 if (vect_print_dump_info (REPORT_COST
))
2872 fprintf (vect_dump
, "unsupported data-type ");
2873 print_generic_expr (vect_dump
, TREE_TYPE (reduction_op
), TDF_SLIM
);
2878 mode
= TYPE_MODE (vectype
);
2879 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2882 orig_stmt
= STMT_VINFO_STMT (stmt_info
);
2884 code
= gimple_assign_rhs_code (orig_stmt
);
2886 /* Add in cost for initial definition. */
2887 outer_cost
+= vect_get_cost (scalar_to_vec
);
2889 /* Determine cost of epilogue code.
2891 We have a reduction operator that will reduce the vector in one statement.
2892 Also requires scalar extract. */
2894 if (!nested_in_vect_loop_p (loop
, orig_stmt
))
2896 if (reduc_code
!= ERROR_MARK
)
2897 outer_cost
+= vect_get_cost (vector_stmt
)
2898 + vect_get_cost (vec_to_scalar
);
2901 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
2903 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt
)));
2904 int element_bitsize
= tree_low_cst (bitsize
, 1);
2905 int nelements
= vec_size_in_bits
/ element_bitsize
;
2907 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
2909 /* We have a whole vector shift available. */
2910 if (VECTOR_MODE_P (mode
)
2911 && optab_handler (optab
, mode
) != CODE_FOR_nothing
2912 && optab_handler (vec_shr_optab
, mode
) != CODE_FOR_nothing
)
2913 /* Final reduction via vector shifts and the reduction operator. Also
2914 requires scalar extract. */
2915 outer_cost
+= ((exact_log2(nelements
) * 2)
2916 * vect_get_cost (vector_stmt
)
2917 + vect_get_cost (vec_to_scalar
));
2919 /* Use extracts and reduction op for final reduction. For N elements,
2920 we have N extracts and N-1 reduction ops. */
2921 outer_cost
+= ((nelements
+ nelements
- 1)
2922 * vect_get_cost (vector_stmt
));
2926 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
) = outer_cost
;
2928 if (vect_print_dump_info (REPORT_COST
))
2929 fprintf (vect_dump
, "vect_model_reduction_cost: inside_cost = %d, "
2930 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
),
2931 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
));
2937 /* Function vect_model_induction_cost.
2939 Models cost for induction operations. */
2942 vect_model_induction_cost (stmt_vec_info stmt_info
, int ncopies
)
2944 /* loop cost for vec_loop. */
2945 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
)
2946 = ncopies
* vect_get_cost (vector_stmt
);
2947 /* prologue cost for vec_init and vec_step. */
2948 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
)
2949 = 2 * vect_get_cost (scalar_to_vec
);
2951 if (vect_print_dump_info (REPORT_COST
))
2952 fprintf (vect_dump
, "vect_model_induction_cost: inside_cost = %d, "
2953 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info
),
2954 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info
));
2958 /* Function get_initial_def_for_induction
2961 STMT - a stmt that performs an induction operation in the loop.
2962 IV_PHI - the initial value of the induction variable
2965 Return a vector variable, initialized with the first VF values of
2966 the induction variable. E.g., for an iv with IV_PHI='X' and
2967 evolution S, for a vector of 4 units, we want to return:
2968 [X, X + S, X + 2*S, X + 3*S]. */
2971 get_initial_def_for_induction (gimple iv_phi
)
2973 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (iv_phi
);
2974 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2975 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2979 edge pe
= loop_preheader_edge (loop
);
2980 struct loop
*iv_loop
;
2982 tree vec
, vec_init
, vec_step
, t
;
2986 gimple init_stmt
, induction_phi
, new_stmt
;
2987 tree induc_def
, vec_def
, vec_dest
;
2988 tree init_expr
, step_expr
;
2989 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2994 stmt_vec_info phi_info
= vinfo_for_stmt (iv_phi
);
2995 bool nested_in_vect_loop
= false;
2996 gimple_seq stmts
= NULL
;
2997 imm_use_iterator imm_iter
;
2998 use_operand_p use_p
;
3002 gimple_stmt_iterator si
;
3003 basic_block bb
= gimple_bb (iv_phi
);
3007 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
3008 if (nested_in_vect_loop_p (loop
, iv_phi
))
3010 nested_in_vect_loop
= true;
3011 iv_loop
= loop
->inner
;
3015 gcc_assert (iv_loop
== (gimple_bb (iv_phi
))->loop_father
);
3017 latch_e
= loop_latch_edge (iv_loop
);
3018 loop_arg
= PHI_ARG_DEF_FROM_EDGE (iv_phi
, latch_e
);
3020 access_fn
= analyze_scalar_evolution (iv_loop
, PHI_RESULT (iv_phi
));
3021 gcc_assert (access_fn
);
3022 STRIP_NOPS (access_fn
);
3023 ok
= vect_is_simple_iv_evolution (iv_loop
->num
, access_fn
,
3024 &init_expr
, &step_expr
);
3026 pe
= loop_preheader_edge (iv_loop
);
3028 scalar_type
= TREE_TYPE (init_expr
);
3029 vectype
= get_vectype_for_scalar_type (scalar_type
);
3030 resvectype
= get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi
)));
3031 gcc_assert (vectype
);
3032 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3033 ncopies
= vf
/ nunits
;
3035 gcc_assert (phi_info
);
3036 gcc_assert (ncopies
>= 1);
3038 /* Find the first insertion point in the BB. */
3039 si
= gsi_after_labels (bb
);
3041 /* Create the vector that holds the initial_value of the induction. */
3042 if (nested_in_vect_loop
)
3044 /* iv_loop is nested in the loop to be vectorized. init_expr had already
3045 been created during vectorization of previous stmts. We obtain it
3046 from the STMT_VINFO_VEC_STMT of the defining stmt. */
3047 tree iv_def
= PHI_ARG_DEF_FROM_EDGE (iv_phi
,
3048 loop_preheader_edge (iv_loop
));
3049 vec_init
= vect_get_vec_def_for_operand (iv_def
, iv_phi
, NULL
);
3053 VEC(constructor_elt
,gc
) *v
;
3055 /* iv_loop is the loop to be vectorized. Create:
3056 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
3057 new_var
= vect_get_new_vect_var (scalar_type
, vect_scalar_var
, "var_");
3058 add_referenced_var (new_var
);
3060 new_name
= force_gimple_operand (init_expr
, &stmts
, false, new_var
);
3063 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3064 gcc_assert (!new_bb
);
3067 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
3068 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, new_name
);
3069 for (i
= 1; i
< nunits
; i
++)
3071 /* Create: new_name_i = new_name + step_expr */
3072 enum tree_code code
= POINTER_TYPE_P (scalar_type
)
3073 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3074 init_stmt
= gimple_build_assign_with_ops (code
, new_var
,
3075 new_name
, step_expr
);
3076 new_name
= make_ssa_name (new_var
, init_stmt
);
3077 gimple_assign_set_lhs (init_stmt
, new_name
);
3079 new_bb
= gsi_insert_on_edge_immediate (pe
, init_stmt
);
3080 gcc_assert (!new_bb
);
3082 if (vect_print_dump_info (REPORT_DETAILS
))
3084 fprintf (vect_dump
, "created new init_stmt: ");
3085 print_gimple_stmt (vect_dump
, init_stmt
, 0, TDF_SLIM
);
3087 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, new_name
);
3089 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
3090 vec
= build_constructor (vectype
, v
);
3091 vec_init
= vect_init_vector (iv_phi
, vec
, vectype
, NULL
);
3095 /* Create the vector that holds the step of the induction. */
3096 if (nested_in_vect_loop
)
3097 /* iv_loop is nested in the loop to be vectorized. Generate:
3098 vec_step = [S, S, S, S] */
3099 new_name
= step_expr
;
3102 /* iv_loop is the loop to be vectorized. Generate:
3103 vec_step = [VF*S, VF*S, VF*S, VF*S] */
3104 expr
= build_int_cst (TREE_TYPE (step_expr
), vf
);
3105 new_name
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
3109 t
= unshare_expr (new_name
);
3110 gcc_assert (CONSTANT_CLASS_P (new_name
));
3111 stepvectype
= get_vectype_for_scalar_type (TREE_TYPE (new_name
));
3112 gcc_assert (stepvectype
);
3113 vec
= build_vector_from_val (stepvectype
, t
);
3114 vec_step
= vect_init_vector (iv_phi
, vec
, stepvectype
, NULL
);
3117 /* Create the following def-use cycle:
3122 vec_iv = PHI <vec_init, vec_loop>
3126 vec_loop = vec_iv + vec_step; */
3128 /* Create the induction-phi that defines the induction-operand. */
3129 vec_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, "vec_iv_");
3130 add_referenced_var (vec_dest
);
3131 induction_phi
= create_phi_node (vec_dest
, iv_loop
->header
);
3132 set_vinfo_for_stmt (induction_phi
,
3133 new_stmt_vec_info (induction_phi
, loop_vinfo
, NULL
));
3134 induc_def
= PHI_RESULT (induction_phi
);
3136 /* Create the iv update inside the loop */
3137 new_stmt
= gimple_build_assign_with_ops (PLUS_EXPR
, vec_dest
,
3138 induc_def
, vec_step
);
3139 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
3140 gimple_assign_set_lhs (new_stmt
, vec_def
);
3141 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3142 set_vinfo_for_stmt (new_stmt
, new_stmt_vec_info (new_stmt
, loop_vinfo
,
3145 /* Set the arguments of the phi node: */
3146 add_phi_arg (induction_phi
, vec_init
, pe
, UNKNOWN_LOCATION
);
3147 add_phi_arg (induction_phi
, vec_def
, loop_latch_edge (iv_loop
),
3151 /* In case that vectorization factor (VF) is bigger than the number
3152 of elements that we can fit in a vectype (nunits), we have to generate
3153 more than one vector stmt - i.e - we need to "unroll" the
3154 vector stmt by a factor VF/nunits. For more details see documentation
3155 in vectorizable_operation. */
3159 stmt_vec_info prev_stmt_vinfo
;
3160 /* FORNOW. This restriction should be relaxed. */
3161 gcc_assert (!nested_in_vect_loop
);
3163 /* Create the vector that holds the step of the induction. */
3164 expr
= build_int_cst (TREE_TYPE (step_expr
), nunits
);
3165 new_name
= fold_build2 (MULT_EXPR
, TREE_TYPE (step_expr
),
3167 t
= unshare_expr (new_name
);
3168 gcc_assert (CONSTANT_CLASS_P (new_name
));
3169 vec
= build_vector_from_val (stepvectype
, t
);
3170 vec_step
= vect_init_vector (iv_phi
, vec
, stepvectype
, NULL
);
3172 vec_def
= induc_def
;
3173 prev_stmt_vinfo
= vinfo_for_stmt (induction_phi
);
3174 for (i
= 1; i
< ncopies
; i
++)
3176 /* vec_i = vec_prev + vec_step */
3177 new_stmt
= gimple_build_assign_with_ops (PLUS_EXPR
, vec_dest
,
3179 vec_def
= make_ssa_name (vec_dest
, new_stmt
);
3180 gimple_assign_set_lhs (new_stmt
, vec_def
);
3182 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3183 if (!useless_type_conversion_p (resvectype
, vectype
))
3185 new_stmt
= gimple_build_assign_with_ops
3187 vect_get_new_vect_var (resvectype
, vect_simple_var
,
3189 build1 (VIEW_CONVERT_EXPR
, resvectype
,
3190 gimple_assign_lhs (new_stmt
)), NULL_TREE
);
3191 gimple_assign_set_lhs (new_stmt
,
3193 (gimple_assign_lhs (new_stmt
), new_stmt
));
3194 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3196 set_vinfo_for_stmt (new_stmt
,
3197 new_stmt_vec_info (new_stmt
, loop_vinfo
, NULL
));
3198 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo
) = new_stmt
;
3199 prev_stmt_vinfo
= vinfo_for_stmt (new_stmt
);
3203 if (nested_in_vect_loop
)
3205 /* Find the loop-closed exit-phi of the induction, and record
3206 the final vector of induction results: */
3208 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
3210 if (!flow_bb_inside_loop_p (iv_loop
, gimple_bb (USE_STMT (use_p
))))
3212 exit_phi
= USE_STMT (use_p
);
3218 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (exit_phi
);
3219 /* FORNOW. Currently not supporting the case that an inner-loop induction
3220 is not used in the outer-loop (i.e. only outside the outer-loop). */
3221 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo
)
3222 && !STMT_VINFO_LIVE_P (stmt_vinfo
));
3224 STMT_VINFO_VEC_STMT (stmt_vinfo
) = new_stmt
;
3225 if (vect_print_dump_info (REPORT_DETAILS
))
3227 fprintf (vect_dump
, "vector of inductions after inner-loop:");
3228 print_gimple_stmt (vect_dump
, new_stmt
, 0, TDF_SLIM
);
3234 if (vect_print_dump_info (REPORT_DETAILS
))
3236 fprintf (vect_dump
, "transform induction: created def-use cycle: ");
3237 print_gimple_stmt (vect_dump
, induction_phi
, 0, TDF_SLIM
);
3238 fprintf (vect_dump
, "\n");
3239 print_gimple_stmt (vect_dump
, SSA_NAME_DEF_STMT (vec_def
), 0, TDF_SLIM
);
3242 STMT_VINFO_VEC_STMT (phi_info
) = induction_phi
;
3243 if (!useless_type_conversion_p (resvectype
, vectype
))
3245 new_stmt
= gimple_build_assign_with_ops
3247 vect_get_new_vect_var (resvectype
, vect_simple_var
, "vec_iv_"),
3248 build1 (VIEW_CONVERT_EXPR
, resvectype
, induc_def
), NULL_TREE
);
3249 induc_def
= make_ssa_name (gimple_assign_lhs (new_stmt
), new_stmt
);
3250 gimple_assign_set_lhs (new_stmt
, induc_def
);
3251 si
= gsi_start_bb (bb
);
3252 gsi_insert_before (&si
, new_stmt
, GSI_SAME_STMT
);
3253 set_vinfo_for_stmt (new_stmt
,
3254 new_stmt_vec_info (new_stmt
, loop_vinfo
, NULL
));
3255 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt
))
3256 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi
));
3263 /* Function get_initial_def_for_reduction
3266 STMT - a stmt that performs a reduction operation in the loop.
3267 INIT_VAL - the initial value of the reduction variable
3270 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3271 of the reduction (used for adjusting the epilog - see below).
3272 Return a vector variable, initialized according to the operation that STMT
3273 performs. This vector will be used as the initial value of the
3274 vector of partial results.
3276 Option1 (adjust in epilog): Initialize the vector as follows:
3277 add/bit or/xor: [0,0,...,0,0]
3278 mult/bit and: [1,1,...,1,1]
3279 min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3280 and when necessary (e.g. add/mult case) let the caller know
3281 that it needs to adjust the result by init_val.
3283 Option2: Initialize the vector as follows:
3284 add/bit or/xor: [init_val,0,0,...,0]
3285 mult/bit and: [init_val,1,1,...,1]
3286 min/max/cond_expr: [init_val,init_val,...,init_val]
3287 and no adjustments are needed.
3289 For example, for the following code:
3295 STMT is 's = s + a[i]', and the reduction variable is 's'.
3296 For a vector of 4 units, we want to return either [0,0,0,init_val],
3297 or [0,0,0,0] and let the caller know that it needs to adjust
3298 the result at the end by 'init_val'.
3300 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3301 initialization vector is simpler (same element in all entries), if
3302 ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3304 A cost model should help decide between these two schemes. */
3307 get_initial_def_for_reduction (gimple stmt
, tree init_val
,
3308 tree
*adjustment_def
)
3310 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3311 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3312 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3313 tree scalar_type
= TREE_TYPE (init_val
);
3314 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
3316 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3321 bool nested_in_vect_loop
= false;
3323 REAL_VALUE_TYPE real_init_val
= dconst0
;
3324 int int_init_val
= 0;
3325 gimple def_stmt
= NULL
;
3327 gcc_assert (vectype
);
3328 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3330 gcc_assert (POINTER_TYPE_P (scalar_type
) || INTEGRAL_TYPE_P (scalar_type
)
3331 || SCALAR_FLOAT_TYPE_P (scalar_type
));
3333 if (nested_in_vect_loop_p (loop
, stmt
))
3334 nested_in_vect_loop
= true;
3336 gcc_assert (loop
== (gimple_bb (stmt
))->loop_father
);
3338 /* In case of double reduction we only create a vector variable to be put
3339 in the reduction phi node. The actual statement creation is done in
3340 vect_create_epilog_for_reduction. */
3341 if (adjustment_def
&& nested_in_vect_loop
3342 && TREE_CODE (init_val
) == SSA_NAME
3343 && (def_stmt
= SSA_NAME_DEF_STMT (init_val
))
3344 && gimple_code (def_stmt
) == GIMPLE_PHI
3345 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
3346 && vinfo_for_stmt (def_stmt
)
3347 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
))
3348 == vect_double_reduction_def
)
3350 *adjustment_def
= NULL
;
3351 return vect_create_destination_var (init_val
, vectype
);
3354 if (TREE_CONSTANT (init_val
))
3356 if (SCALAR_FLOAT_TYPE_P (scalar_type
))
3357 init_value
= build_real (scalar_type
, TREE_REAL_CST (init_val
));
3359 init_value
= build_int_cst (scalar_type
, TREE_INT_CST_LOW (init_val
));
3362 init_value
= init_val
;
3366 case WIDEN_SUM_EXPR
:
3374 /* ADJUSMENT_DEF is NULL when called from
3375 vect_create_epilog_for_reduction to vectorize double reduction. */
3378 if (nested_in_vect_loop
)
3379 *adjustment_def
= vect_get_vec_def_for_operand (init_val
, stmt
,
3382 *adjustment_def
= init_val
;
3385 if (code
== MULT_EXPR
)
3387 real_init_val
= dconst1
;
3391 if (code
== BIT_AND_EXPR
)
3394 if (SCALAR_FLOAT_TYPE_P (scalar_type
))
3395 def_for_init
= build_real (scalar_type
, real_init_val
);
3397 def_for_init
= build_int_cst (scalar_type
, int_init_val
);
3399 /* Create a vector of '0' or '1' except the first element. */
3400 elts
= XALLOCAVEC (tree
, nunits
);
3401 for (i
= nunits
- 2; i
>= 0; --i
)
3402 elts
[i
+ 1] = def_for_init
;
3404 /* Option1: the first element is '0' or '1' as well. */
3407 elts
[0] = def_for_init
;
3408 init_def
= build_vector (vectype
, elts
);
3412 /* Option2: the first element is INIT_VAL. */
3414 if (TREE_CONSTANT (init_val
))
3415 init_def
= build_vector (vectype
, elts
);
3418 VEC(constructor_elt
,gc
) *v
;
3419 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
3420 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, init_val
);
3421 for (i
= 1; i
< nunits
; ++i
)
3422 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[i
]);
3423 init_def
= build_constructor (vectype
, v
);
3433 *adjustment_def
= NULL_TREE
;
3434 init_def
= vect_get_vec_def_for_operand (init_val
, stmt
, NULL
);
3438 init_def
= build_vector_from_val (vectype
, init_value
);
3449 /* Function vect_create_epilog_for_reduction
3451 Create code at the loop-epilog to finalize the result of a reduction
3454 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3455 reduction statements.
3456 STMT is the scalar reduction stmt that is being vectorized.
3457 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3458 number of elements that we can fit in a vectype (nunits). In this case
3459 we have to generate more than one vector stmt - i.e - we need to "unroll"
3460 the vector stmt by a factor VF/nunits. For more details see documentation
3461 in vectorizable_operation.
3462 REDUC_CODE is the tree-code for the epilog reduction.
3463 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3465 REDUC_INDEX is the index of the operand in the right hand side of the
3466 statement that is defined by REDUCTION_PHI.
3467 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3468 SLP_NODE is an SLP node containing a group of reduction statements. The
3469 first one in this group is STMT.
3472 1. Creates the reduction def-use cycles: sets the arguments for
3474 The loop-entry argument is the vectorized initial-value of the reduction.
3475 The loop-latch argument is taken from VECT_DEFS - the vector of partial
3477 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3478 by applying the operation specified by REDUC_CODE if available, or by
3479 other means (whole-vector shifts or a scalar loop).
3480 The function also creates a new phi node at the loop exit to preserve
3481 loop-closed form, as illustrated below.
3483 The flow at the entry to this function:
3486 vec_def = phi <null, null> # REDUCTION_PHI
3487 VECT_DEF = vector_stmt # vectorized form of STMT
3488 s_loop = scalar_stmt # (scalar) STMT
3490 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3494 The above is transformed by this function into:
3497 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3498 VECT_DEF = vector_stmt # vectorized form of STMT
3499 s_loop = scalar_stmt # (scalar) STMT
3501 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3502 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3503 v_out2 = reduce <v_out1>
3504 s_out3 = extract_field <v_out2, 0>
3505 s_out4 = adjust_result <s_out3>
3511 vect_create_epilog_for_reduction (VEC (tree
, heap
) *vect_defs
, gimple stmt
,
3512 int ncopies
, enum tree_code reduc_code
,
3513 VEC (gimple
, heap
) *reduction_phis
,
3514 int reduc_index
, bool double_reduc
,
3517 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3518 stmt_vec_info prev_phi_info
;
3520 enum machine_mode mode
;
3521 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3522 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
), *outer_loop
= NULL
;
3523 basic_block exit_bb
;
3526 gimple new_phi
= NULL
, phi
;
3527 gimple_stmt_iterator exit_gsi
;
3529 tree new_temp
= NULL_TREE
, new_dest
, new_name
, new_scalar_dest
;
3530 gimple epilog_stmt
= NULL
;
3531 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3533 tree bitsize
, bitpos
;
3534 tree adjustment_def
= NULL
;
3535 tree vec_initial_def
= NULL
;
3536 tree reduction_op
, expr
, def
;
3537 tree orig_name
, scalar_result
;
3538 imm_use_iterator imm_iter
, phi_imm_iter
;
3539 use_operand_p use_p
, phi_use_p
;
3540 bool extract_scalar_result
= false;
3541 gimple use_stmt
, orig_stmt
, reduction_phi
= NULL
;
3542 bool nested_in_vect_loop
= false;
3543 VEC (gimple
, heap
) *new_phis
= NULL
;
3544 VEC (gimple
, heap
) *inner_phis
= NULL
;
3545 enum vect_def_type dt
= vect_unknown_def_type
;
3547 VEC (tree
, heap
) *scalar_results
= NULL
;
3548 unsigned int group_size
= 1, k
, ratio
;
3549 VEC (tree
, heap
) *vec_initial_defs
= NULL
;
3550 VEC (gimple
, heap
) *phis
;
3551 bool slp_reduc
= false;
3552 tree new_phi_result
;
3553 gimple inner_phi
= NULL
;
3556 group_size
= VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
));
3558 if (nested_in_vect_loop_p (loop
, stmt
))
3562 nested_in_vect_loop
= true;
3563 gcc_assert (!slp_node
);
3566 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
3568 case GIMPLE_SINGLE_RHS
:
3569 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
))
3571 reduction_op
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), reduc_index
);
3573 case GIMPLE_UNARY_RHS
:
3574 reduction_op
= gimple_assign_rhs1 (stmt
);
3576 case GIMPLE_BINARY_RHS
:
3577 reduction_op
= reduc_index
?
3578 gimple_assign_rhs2 (stmt
) : gimple_assign_rhs1 (stmt
);
3580 case GIMPLE_TERNARY_RHS
:
3581 reduction_op
= gimple_op (stmt
, reduc_index
+ 1);
3587 vectype
= get_vectype_for_scalar_type (TREE_TYPE (reduction_op
));
3588 gcc_assert (vectype
);
3589 mode
= TYPE_MODE (vectype
);
3591 /* 1. Create the reduction def-use cycle:
3592 Set the arguments of REDUCTION_PHIS, i.e., transform
3595 vec_def = phi <null, null> # REDUCTION_PHI
3596 VECT_DEF = vector_stmt # vectorized form of STMT
3602 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3603 VECT_DEF = vector_stmt # vectorized form of STMT
3606 (in case of SLP, do it for all the phis). */
3608 /* Get the loop-entry arguments. */
3610 vect_get_vec_defs (reduction_op
, NULL_TREE
, stmt
, &vec_initial_defs
,
3611 NULL
, slp_node
, reduc_index
);
3614 vec_initial_defs
= VEC_alloc (tree
, heap
, 1);
3615 /* For the case of reduction, vect_get_vec_def_for_operand returns
3616 the scalar def before the loop, that defines the initial value
3617 of the reduction variable. */
3618 vec_initial_def
= vect_get_vec_def_for_operand (reduction_op
, stmt
,
3620 VEC_quick_push (tree
, vec_initial_defs
, vec_initial_def
);
3623 /* Set phi nodes arguments. */
3624 FOR_EACH_VEC_ELT (gimple
, reduction_phis
, i
, phi
)
3626 tree vec_init_def
= VEC_index (tree
, vec_initial_defs
, i
);
3627 tree def
= VEC_index (tree
, vect_defs
, i
);
3628 for (j
= 0; j
< ncopies
; j
++)
3630 /* Set the loop-entry arg of the reduction-phi. */
3631 add_phi_arg (phi
, vec_init_def
, loop_preheader_edge (loop
),
3634 /* Set the loop-latch arg for the reduction-phi. */
3636 def
= vect_get_vec_def_for_stmt_copy (vect_unknown_def_type
, def
);
3638 add_phi_arg (phi
, def
, loop_latch_edge (loop
), UNKNOWN_LOCATION
);
3640 if (vect_print_dump_info (REPORT_DETAILS
))
3642 fprintf (vect_dump
, "transform reduction: created def-use"
3644 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
3645 fprintf (vect_dump
, "\n");
3646 print_gimple_stmt (vect_dump
, SSA_NAME_DEF_STMT (def
), 0,
3650 phi
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
));
3654 VEC_free (tree
, heap
, vec_initial_defs
);
3656 /* 2. Create epilog code.
3657 The reduction epilog code operates across the elements of the vector
3658 of partial results computed by the vectorized loop.
3659 The reduction epilog code consists of:
3661 step 1: compute the scalar result in a vector (v_out2)
3662 step 2: extract the scalar result (s_out3) from the vector (v_out2)
3663 step 3: adjust the scalar result (s_out3) if needed.
3665 Step 1 can be accomplished using one the following three schemes:
3666 (scheme 1) using reduc_code, if available.
3667 (scheme 2) using whole-vector shifts, if available.
3668 (scheme 3) using a scalar loop. In this case steps 1+2 above are
3671 The overall epilog code looks like this:
3673 s_out0 = phi <s_loop> # original EXIT_PHI
3674 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3675 v_out2 = reduce <v_out1> # step 1
3676 s_out3 = extract_field <v_out2, 0> # step 2
3677 s_out4 = adjust_result <s_out3> # step 3
3679 (step 3 is optional, and steps 1 and 2 may be combined).
3680 Lastly, the uses of s_out0 are replaced by s_out4. */
3683 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3684 v_out1 = phi <VECT_DEF>
3685 Store them in NEW_PHIS. */
3687 exit_bb
= single_exit (loop
)->dest
;
3688 prev_phi_info
= NULL
;
3689 new_phis
= VEC_alloc (gimple
, heap
, VEC_length (tree
, vect_defs
));
3690 FOR_EACH_VEC_ELT (tree
, vect_defs
, i
, def
)
3692 for (j
= 0; j
< ncopies
; j
++)
3694 phi
= create_phi_node (SSA_NAME_VAR (def
), exit_bb
);
3695 set_vinfo_for_stmt (phi
, new_stmt_vec_info (phi
, loop_vinfo
, NULL
));
3697 VEC_quick_push (gimple
, new_phis
, phi
);
3700 def
= vect_get_vec_def_for_stmt_copy (dt
, def
);
3701 STMT_VINFO_RELATED_STMT (prev_phi_info
) = phi
;
3704 SET_PHI_ARG_DEF (phi
, single_exit (loop
)->dest_idx
, def
);
3705 prev_phi_info
= vinfo_for_stmt (phi
);
3709 /* The epilogue is created for the outer-loop, i.e., for the loop being
3710 vectorized. Create exit phis for the outer loop. */
3714 exit_bb
= single_exit (loop
)->dest
;
3715 inner_phis
= VEC_alloc (gimple
, heap
, VEC_length (tree
, vect_defs
));
3716 FOR_EACH_VEC_ELT (gimple
, new_phis
, i
, phi
)
3718 gimple outer_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi
)),
3720 SET_PHI_ARG_DEF (outer_phi
, single_exit (loop
)->dest_idx
,
3722 set_vinfo_for_stmt (outer_phi
, new_stmt_vec_info (outer_phi
,
3724 VEC_quick_push (gimple
, inner_phis
, phi
);
3725 VEC_replace (gimple
, new_phis
, i
, outer_phi
);
3726 prev_phi_info
= vinfo_for_stmt (outer_phi
);
3727 while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
)))
3729 phi
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi
));
3730 outer_phi
= create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi
)),
3732 SET_PHI_ARG_DEF (outer_phi
, single_exit (loop
)->dest_idx
,
3734 set_vinfo_for_stmt (outer_phi
, new_stmt_vec_info (outer_phi
,
3736 STMT_VINFO_RELATED_STMT (prev_phi_info
) = outer_phi
;
3737 prev_phi_info
= vinfo_for_stmt (outer_phi
);
3742 exit_gsi
= gsi_after_labels (exit_bb
);
3744 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3745 (i.e. when reduc_code is not available) and in the final adjustment
3746 code (if needed). Also get the original scalar reduction variable as
3747 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
3748 represents a reduction pattern), the tree-code and scalar-def are
3749 taken from the original stmt that the pattern-stmt (STMT) replaces.
3750 Otherwise (it is a regular reduction) - the tree-code and scalar-def
3751 are taken from STMT. */
3753 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3756 /* Regular reduction */
3761 /* Reduction pattern */
3762 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt
);
3763 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
3764 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
3767 code
= gimple_assign_rhs_code (orig_stmt
);
3768 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3769 partial results are added and not subtracted. */
3770 if (code
== MINUS_EXPR
)
3773 scalar_dest
= gimple_assign_lhs (orig_stmt
);
3774 scalar_type
= TREE_TYPE (scalar_dest
);
3775 scalar_results
= VEC_alloc (tree
, heap
, group_size
);
3776 new_scalar_dest
= vect_create_destination_var (scalar_dest
, NULL
);
3777 bitsize
= TYPE_SIZE (scalar_type
);
3779 /* In case this is a reduction in an inner-loop while vectorizing an outer
3780 loop - we don't need to extract a single scalar result at the end of the
3781 inner-loop (unless it is double reduction, i.e., the use of reduction is
3782 outside the outer-loop). The final vector of partial results will be used
3783 in the vectorized outer-loop, or reduced to a scalar result at the end of
3785 if (nested_in_vect_loop
&& !double_reduc
)
3786 goto vect_finalize_reduction
;
3788 /* SLP reduction without reduction chain, e.g.,
3792 b2 = operation (b1) */
3793 slp_reduc
= (slp_node
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)));
3795 /* In case of reduction chain, e.g.,
3798 a3 = operation (a2),
3800 we may end up with more than one vector result. Here we reduce them to
3802 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
3804 tree first_vect
= PHI_RESULT (VEC_index (gimple
, new_phis
, 0));
3806 gimple new_vec_stmt
= NULL
;
3808 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3809 for (k
= 1; k
< VEC_length (gimple
, new_phis
); k
++)
3811 gimple next_phi
= VEC_index (gimple
, new_phis
, k
);
3812 tree second_vect
= PHI_RESULT (next_phi
);
3814 tmp
= build2 (code
, vectype
, first_vect
, second_vect
);
3815 new_vec_stmt
= gimple_build_assign (vec_dest
, tmp
);
3816 first_vect
= make_ssa_name (vec_dest
, new_vec_stmt
);
3817 gimple_assign_set_lhs (new_vec_stmt
, first_vect
);
3818 gsi_insert_before (&exit_gsi
, new_vec_stmt
, GSI_SAME_STMT
);
3821 new_phi_result
= first_vect
;
3824 VEC_truncate (gimple
, new_phis
, 0);
3825 VEC_safe_push (gimple
, heap
, new_phis
, new_vec_stmt
);
3829 new_phi_result
= PHI_RESULT (VEC_index (gimple
, new_phis
, 0));
3831 /* 2.3 Create the reduction code, using one of the three schemes described
3832 above. In SLP we simply need to extract all the elements from the
3833 vector (without reducing them), so we use scalar shifts. */
3834 if (reduc_code
!= ERROR_MARK
&& !slp_reduc
)
3838 /*** Case 1: Create:
3839 v_out2 = reduc_expr <v_out1> */
3841 if (vect_print_dump_info (REPORT_DETAILS
))
3842 fprintf (vect_dump
, "Reduce using direct vector reduction.");
3844 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3845 tmp
= build1 (reduc_code
, vectype
, new_phi_result
);
3846 epilog_stmt
= gimple_build_assign (vec_dest
, tmp
);
3847 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
3848 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
3849 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3851 extract_scalar_result
= true;
3855 enum tree_code shift_code
= ERROR_MARK
;
3856 bool have_whole_vector_shift
= true;
3858 int element_bitsize
= tree_low_cst (bitsize
, 1);
3859 int vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
3862 if (optab_handler (vec_shr_optab
, mode
) != CODE_FOR_nothing
)
3863 shift_code
= VEC_RSHIFT_EXPR
;
3865 have_whole_vector_shift
= false;
3867 /* Regardless of whether we have a whole vector shift, if we're
3868 emulating the operation via tree-vect-generic, we don't want
3869 to use it. Only the first round of the reduction is likely
3870 to still be profitable via emulation. */
3871 /* ??? It might be better to emit a reduction tree code here, so that
3872 tree-vect-generic can expand the first round via bit tricks. */
3873 if (!VECTOR_MODE_P (mode
))
3874 have_whole_vector_shift
= false;
3877 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3878 if (optab_handler (optab
, mode
) == CODE_FOR_nothing
)
3879 have_whole_vector_shift
= false;
3882 if (have_whole_vector_shift
&& !slp_reduc
)
3884 /*** Case 2: Create:
3885 for (offset = VS/2; offset >= element_size; offset/=2)
3887 Create: va' = vec_shift <va, offset>
3888 Create: va = vop <va, va'>
3891 if (vect_print_dump_info (REPORT_DETAILS
))
3892 fprintf (vect_dump
, "Reduce using vector shifts");
3894 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3895 new_temp
= new_phi_result
;
3896 for (bit_offset
= vec_size_in_bits
/2;
3897 bit_offset
>= element_bitsize
;
3900 tree bitpos
= size_int (bit_offset
);
3902 epilog_stmt
= gimple_build_assign_with_ops (shift_code
,
3903 vec_dest
, new_temp
, bitpos
);
3904 new_name
= make_ssa_name (vec_dest
, epilog_stmt
);
3905 gimple_assign_set_lhs (epilog_stmt
, new_name
);
3906 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3908 epilog_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
3909 new_name
, new_temp
);
3910 new_temp
= make_ssa_name (vec_dest
, epilog_stmt
);
3911 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
3912 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3915 extract_scalar_result
= true;
3921 /*** Case 3: Create:
3922 s = extract_field <v_out2, 0>
3923 for (offset = element_size;
3924 offset < vector_size;
3925 offset += element_size;)
3927 Create: s' = extract_field <v_out2, offset>
3928 Create: s = op <s, s'> // For non SLP cases
3931 if (vect_print_dump_info (REPORT_DETAILS
))
3932 fprintf (vect_dump
, "Reduce using scalar code. ");
3934 vec_size_in_bits
= tree_low_cst (TYPE_SIZE (vectype
), 1);
3935 FOR_EACH_VEC_ELT (gimple
, new_phis
, i
, new_phi
)
3937 if (gimple_code (new_phi
) == GIMPLE_PHI
)
3938 vec_temp
= PHI_RESULT (new_phi
);
3940 vec_temp
= gimple_assign_lhs (new_phi
);
3941 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
, bitsize
,
3943 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
3944 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
3945 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
3946 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3948 /* In SLP we don't need to apply reduction operation, so we just
3949 collect s' values in SCALAR_RESULTS. */
3951 VEC_safe_push (tree
, heap
, scalar_results
, new_temp
);
3953 for (bit_offset
= element_bitsize
;
3954 bit_offset
< vec_size_in_bits
;
3955 bit_offset
+= element_bitsize
)
3957 tree bitpos
= bitsize_int (bit_offset
);
3958 tree rhs
= build3 (BIT_FIELD_REF
, scalar_type
, vec_temp
,
3961 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
3962 new_name
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
3963 gimple_assign_set_lhs (epilog_stmt
, new_name
);
3964 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3968 /* In SLP we don't need to apply reduction operation, so
3969 we just collect s' values in SCALAR_RESULTS. */
3970 new_temp
= new_name
;
3971 VEC_safe_push (tree
, heap
, scalar_results
, new_name
);
3975 epilog_stmt
= gimple_build_assign_with_ops (code
,
3976 new_scalar_dest
, new_name
, new_temp
);
3977 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
3978 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
3979 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
3984 /* The only case where we need to reduce scalar results in SLP, is
3985 unrolling. If the size of SCALAR_RESULTS is greater than
3986 GROUP_SIZE, we reduce them combining elements modulo
3990 tree res
, first_res
, new_res
;
3993 /* Reduce multiple scalar results in case of SLP unrolling. */
3994 for (j
= group_size
; VEC_iterate (tree
, scalar_results
, j
, res
);
3997 first_res
= VEC_index (tree
, scalar_results
, j
% group_size
);
3998 new_stmt
= gimple_build_assign_with_ops (code
,
3999 new_scalar_dest
, first_res
, res
);
4000 new_res
= make_ssa_name (new_scalar_dest
, new_stmt
);
4001 gimple_assign_set_lhs (new_stmt
, new_res
);
4002 gsi_insert_before (&exit_gsi
, new_stmt
, GSI_SAME_STMT
);
4003 VEC_replace (tree
, scalar_results
, j
% group_size
, new_res
);
4007 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
4008 VEC_safe_push (tree
, heap
, scalar_results
, new_temp
);
4010 extract_scalar_result
= false;
4014 /* 2.4 Extract the final scalar result. Create:
4015 s_out3 = extract_field <v_out2, bitpos> */
4017 if (extract_scalar_result
)
4021 if (vect_print_dump_info (REPORT_DETAILS
))
4022 fprintf (vect_dump
, "extract scalar result");
4024 if (BYTES_BIG_ENDIAN
)
4025 bitpos
= size_binop (MULT_EXPR
,
4026 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1),
4027 TYPE_SIZE (scalar_type
));
4029 bitpos
= bitsize_zero_node
;
4031 rhs
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
, bitsize
, bitpos
);
4032 epilog_stmt
= gimple_build_assign (new_scalar_dest
, rhs
);
4033 new_temp
= make_ssa_name (new_scalar_dest
, epilog_stmt
);
4034 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4035 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4036 VEC_safe_push (tree
, heap
, scalar_results
, new_temp
);
4039 vect_finalize_reduction
:
4044 /* 2.5 Adjust the final result by the initial value of the reduction
4045 variable. (When such adjustment is not needed, then
4046 'adjustment_def' is zero). For example, if code is PLUS we create:
4047 new_temp = loop_exit_def + adjustment_def */
4051 gcc_assert (!slp_reduc
);
4052 if (nested_in_vect_loop
)
4054 new_phi
= VEC_index (gimple
, new_phis
, 0);
4055 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) == VECTOR_TYPE
);
4056 expr
= build2 (code
, vectype
, PHI_RESULT (new_phi
), adjustment_def
);
4057 new_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4061 new_temp
= VEC_index (tree
, scalar_results
, 0);
4062 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def
)) != VECTOR_TYPE
);
4063 expr
= build2 (code
, scalar_type
, new_temp
, adjustment_def
);
4064 new_dest
= vect_create_destination_var (scalar_dest
, scalar_type
);
4067 epilog_stmt
= gimple_build_assign (new_dest
, expr
);
4068 new_temp
= make_ssa_name (new_dest
, epilog_stmt
);
4069 gimple_assign_set_lhs (epilog_stmt
, new_temp
);
4070 SSA_NAME_DEF_STMT (new_temp
) = epilog_stmt
;
4071 gsi_insert_before (&exit_gsi
, epilog_stmt
, GSI_SAME_STMT
);
4072 if (nested_in_vect_loop
)
4074 set_vinfo_for_stmt (epilog_stmt
,
4075 new_stmt_vec_info (epilog_stmt
, loop_vinfo
,
4077 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt
)) =
4078 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi
));
4081 VEC_quick_push (tree
, scalar_results
, new_temp
);
4083 VEC_replace (tree
, scalar_results
, 0, new_temp
);
4086 VEC_replace (tree
, scalar_results
, 0, new_temp
);
4088 VEC_replace (gimple
, new_phis
, 0, epilog_stmt
);
4091 /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
4092 phis with new adjusted scalar results, i.e., replace use <s_out0>
4097 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4098 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4099 v_out2 = reduce <v_out1>
4100 s_out3 = extract_field <v_out2, 0>
4101 s_out4 = adjust_result <s_out3>
4108 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4109 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4110 v_out2 = reduce <v_out1>
4111 s_out3 = extract_field <v_out2, 0>
4112 s_out4 = adjust_result <s_out3>
4117 /* In SLP reduction chain we reduce vector results into one vector if
4118 necessary, hence we set here GROUP_SIZE to 1. SCALAR_DEST is the LHS of
4119 the last stmt in the reduction chain, since we are looking for the loop
4121 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
4123 scalar_dest
= gimple_assign_lhs (VEC_index (gimple
,
4124 SLP_TREE_SCALAR_STMTS (slp_node
),
4129 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4130 case that GROUP_SIZE is greater than vectorization factor). Therefore, we
4131 need to match SCALAR_RESULTS with corresponding statements. The first
4132 (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4133 the first vector stmt, etc.
4134 (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
4135 if (group_size
> VEC_length (gimple
, new_phis
))
4137 ratio
= group_size
/ VEC_length (gimple
, new_phis
);
4138 gcc_assert (!(group_size
% VEC_length (gimple
, new_phis
)));
4143 for (k
= 0; k
< group_size
; k
++)
4147 epilog_stmt
= VEC_index (gimple
, new_phis
, k
/ ratio
);
4148 reduction_phi
= VEC_index (gimple
, reduction_phis
, k
/ ratio
);
4150 inner_phi
= VEC_index (gimple
, inner_phis
, k
/ ratio
);
4155 gimple current_stmt
= VEC_index (gimple
,
4156 SLP_TREE_SCALAR_STMTS (slp_node
), k
);
4158 orig_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt
));
4159 /* SLP statements can't participate in patterns. */
4160 gcc_assert (!orig_stmt
);
4161 scalar_dest
= gimple_assign_lhs (current_stmt
);
4164 phis
= VEC_alloc (gimple
, heap
, 3);
4165 /* Find the loop-closed-use at the loop exit of the original scalar
4166 result. (The reduction result is expected to have two immediate uses -
4167 one at the latch block, and one at the loop exit). */
4168 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
4169 if (!flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
4170 VEC_safe_push (gimple
, heap
, phis
, USE_STMT (use_p
));
4172 /* We expect to have found an exit_phi because of loop-closed-ssa
4174 gcc_assert (!VEC_empty (gimple
, phis
));
4176 FOR_EACH_VEC_ELT (gimple
, phis
, i
, exit_phi
)
4180 stmt_vec_info exit_phi_vinfo
= vinfo_for_stmt (exit_phi
);
4183 /* FORNOW. Currently not supporting the case that an inner-loop
4184 reduction is not used in the outer-loop (but only outside the
4185 outer-loop), unless it is double reduction. */
4186 gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo
)
4187 && !STMT_VINFO_LIVE_P (exit_phi_vinfo
))
4190 STMT_VINFO_VEC_STMT (exit_phi_vinfo
) = epilog_stmt
;
4192 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo
)
4193 != vect_double_reduction_def
)
4196 /* Handle double reduction:
4198 stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
4199 stmt2: s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4200 stmt3: s4 = use (s3) - (regular) reduc stmt (inner loop)
4201 stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
4203 At that point the regular reduction (stmt2 and stmt3) is
4204 already vectorized, as well as the exit phi node, stmt4.
4205 Here we vectorize the phi node of double reduction, stmt1, and
4206 update all relevant statements. */
4208 /* Go through all the uses of s2 to find double reduction phi
4209 node, i.e., stmt1 above. */
4210 orig_name
= PHI_RESULT (exit_phi
);
4211 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
4213 stmt_vec_info use_stmt_vinfo
= vinfo_for_stmt (use_stmt
);
4214 stmt_vec_info new_phi_vinfo
;
4215 tree vect_phi_init
, preheader_arg
, vect_phi_res
, init_def
;
4216 basic_block bb
= gimple_bb (use_stmt
);
4219 /* Check that USE_STMT is really double reduction phi
4221 if (gimple_code (use_stmt
) != GIMPLE_PHI
4222 || gimple_phi_num_args (use_stmt
) != 2
4224 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo
)
4225 != vect_double_reduction_def
4226 || bb
->loop_father
!= outer_loop
)
4229 /* Create vector phi node for double reduction:
4230 vs1 = phi <vs0, vs2>
4231 vs1 was created previously in this function by a call to
4232 vect_get_vec_def_for_operand and is stored in
4234 vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4235 vs0 is created here. */
4237 /* Create vector phi node. */
4238 vect_phi
= create_phi_node (vec_initial_def
, bb
);
4239 new_phi_vinfo
= new_stmt_vec_info (vect_phi
,
4240 loop_vec_info_for_loop (outer_loop
), NULL
);
4241 set_vinfo_for_stmt (vect_phi
, new_phi_vinfo
);
4243 /* Create vs0 - initial def of the double reduction phi. */
4244 preheader_arg
= PHI_ARG_DEF_FROM_EDGE (use_stmt
,
4245 loop_preheader_edge (outer_loop
));
4246 init_def
= get_initial_def_for_reduction (stmt
,
4247 preheader_arg
, NULL
);
4248 vect_phi_init
= vect_init_vector (use_stmt
, init_def
,
4251 /* Update phi node arguments with vs0 and vs2. */
4252 add_phi_arg (vect_phi
, vect_phi_init
,
4253 loop_preheader_edge (outer_loop
),
4255 add_phi_arg (vect_phi
, PHI_RESULT (inner_phi
),
4256 loop_latch_edge (outer_loop
), UNKNOWN_LOCATION
);
4257 if (vect_print_dump_info (REPORT_DETAILS
))
4259 fprintf (vect_dump
, "created double reduction phi "
4261 print_gimple_stmt (vect_dump
, vect_phi
, 0, TDF_SLIM
);
4264 vect_phi_res
= PHI_RESULT (vect_phi
);
4266 /* Replace the use, i.e., set the correct vs1 in the regular
4267 reduction phi node. FORNOW, NCOPIES is always 1, so the
4268 loop is redundant. */
4269 use
= reduction_phi
;
4270 for (j
= 0; j
< ncopies
; j
++)
4272 edge pr_edge
= loop_preheader_edge (loop
);
4273 SET_PHI_ARG_DEF (use
, pr_edge
->dest_idx
, vect_phi_res
);
4274 use
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use
));
4280 VEC_free (gimple
, heap
, phis
);
4281 if (nested_in_vect_loop
)
4289 phis
= VEC_alloc (gimple
, heap
, 3);
4290 /* Find the loop-closed-use at the loop exit of the original scalar
4291 result. (The reduction result is expected to have two immediate uses,
4292 one at the latch block, and one at the loop exit). For double
4293 reductions we are looking for exit phis of the outer loop. */
4294 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
4296 if (!flow_bb_inside_loop_p (loop
, gimple_bb (USE_STMT (use_p
))))
4297 VEC_safe_push (gimple
, heap
, phis
, USE_STMT (use_p
));
4300 if (double_reduc
&& gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
)
4302 tree phi_res
= PHI_RESULT (USE_STMT (use_p
));
4304 FOR_EACH_IMM_USE_FAST (phi_use_p
, phi_imm_iter
, phi_res
)
4306 if (!flow_bb_inside_loop_p (loop
,
4307 gimple_bb (USE_STMT (phi_use_p
))))
4308 VEC_safe_push (gimple
, heap
, phis
,
4309 USE_STMT (phi_use_p
));
4315 FOR_EACH_VEC_ELT (gimple
, phis
, i
, exit_phi
)
4317 /* Replace the uses: */
4318 orig_name
= PHI_RESULT (exit_phi
);
4319 scalar_result
= VEC_index (tree
, scalar_results
, k
);
4320 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, orig_name
)
4321 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
4322 SET_USE (use_p
, scalar_result
);
4325 VEC_free (gimple
, heap
, phis
);
4328 VEC_free (tree
, heap
, scalar_results
);
4329 VEC_free (gimple
, heap
, new_phis
);
4333 /* Function vectorizable_reduction.
4335 Check if STMT performs a reduction operation that can be vectorized.
4336 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4337 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4338 Return FALSE if not a vectorizable STMT, TRUE otherwise.
4340 This function also handles reduction idioms (patterns) that have been
4341 recognized in advance during vect_pattern_recog. In this case, STMT may be
4343 X = pattern_expr (arg0, arg1, ..., X)
4344 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4345 sequence that had been detected and replaced by the pattern-stmt (STMT).
4347 In some cases of reduction patterns, the type of the reduction variable X is
4348 different than the type of the other arguments of STMT.
4349 In such cases, the vectype that is used when transforming STMT into a vector
4350 stmt is different than the vectype that is used to determine the
4351 vectorization factor, because it consists of a different number of elements
4352 than the actual number of elements that are being operated upon in parallel.
4354 For example, consider an accumulation of shorts into an int accumulator.
4355 On some targets it's possible to vectorize this pattern operating on 8
4356 shorts at a time (hence, the vectype for purposes of determining the
4357 vectorization factor should be V8HI); on the other hand, the vectype that
4358 is used to create the vector form is actually V4SI (the type of the result).
4360 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4361 indicates what is the actual level of parallelism (V8HI in the example), so
4362 that the right vectorization factor would be derived. This vectype
4363 corresponds to the type of arguments to the reduction stmt, and should *NOT*
4364 be used to create the vectorized stmt. The right vectype for the vectorized
4365 stmt is obtained from the type of the result X:
4366 get_vectype_for_scalar_type (TREE_TYPE (X))
4368 This means that, contrary to "regular" reductions (or "regular" stmts in
4369 general), the following equation:
4370 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4371 does *NOT* necessarily hold for reduction patterns. */
4374 vectorizable_reduction (gimple stmt
, gimple_stmt_iterator
*gsi
,
4375 gimple
*vec_stmt
, slp_tree slp_node
)
4379 tree loop_vec_def0
= NULL_TREE
, loop_vec_def1
= NULL_TREE
;
4380 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4381 tree vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4382 tree vectype_in
= NULL_TREE
;
4383 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4384 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4385 enum tree_code code
, orig_code
, epilog_reduc_code
;
4386 enum machine_mode vec_mode
;
4388 optab optab
, reduc_optab
;
4389 tree new_temp
= NULL_TREE
;
4392 enum vect_def_type dt
;
4393 gimple new_phi
= NULL
;
4397 stmt_vec_info orig_stmt_info
;
4398 tree expr
= NULL_TREE
;
4402 stmt_vec_info prev_stmt_info
, prev_phi_info
;
4403 bool single_defuse_cycle
= false;
4404 tree reduc_def
= NULL_TREE
;
4405 gimple new_stmt
= NULL
;
4408 bool nested_cycle
= false, found_nested_cycle_def
= false;
4409 gimple reduc_def_stmt
= NULL
;
4410 /* The default is that the reduction variable is the last in statement. */
4411 int reduc_index
= 2;
4412 bool double_reduc
= false, dummy
;
4414 struct loop
* def_stmt_loop
, *outer_loop
= NULL
;
4416 gimple def_arg_stmt
;
4417 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
, *vect_defs
= NULL
;
4418 VEC (gimple
, heap
) *phis
= NULL
;
4420 tree def0
, def1
, tem
, op0
, op1
= NULL_TREE
;
4422 /* In case of reduction chain we switch to the first stmt in the chain, but
4423 we don't update STMT_INFO, since only the last stmt is marked as reduction
4424 and has reduction properties. */
4425 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
4426 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4428 if (nested_in_vect_loop_p (loop
, stmt
))
4432 nested_cycle
= true;
4435 /* 1. Is vectorizable reduction? */
4436 /* Not supportable if the reduction variable is used in the loop, unless
4437 it's a reduction chain. */
4438 if (STMT_VINFO_RELEVANT (stmt_info
) > vect_used_in_outer
4439 && !GROUP_FIRST_ELEMENT (stmt_info
))
4442 /* Reductions that are not used even in an enclosing outer-loop,
4443 are expected to be "live" (used out of the loop). */
4444 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_scope
4445 && !STMT_VINFO_LIVE_P (stmt_info
))
4448 /* Make sure it was already recognized as a reduction computation. */
4449 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
4450 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
)
4453 /* 2. Has this been recognized as a reduction pattern?
4455 Check if STMT represents a pattern that has been recognized
4456 in earlier analysis stages. For stmts that represent a pattern,
4457 the STMT_VINFO_RELATED_STMT field records the last stmt in
4458 the original sequence that constitutes the pattern. */
4460 orig_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
4463 orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
4464 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) == stmt
);
4465 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
));
4466 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info
));
4469 /* 3. Check the operands of the operation. The first operands are defined
4470 inside the loop body. The last operand is the reduction variable,
4471 which is defined by the loop-header-phi. */
4473 gcc_assert (is_gimple_assign (stmt
));
4476 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt
)))
4478 case GIMPLE_SINGLE_RHS
:
4479 op_type
= TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt
));
4480 if (op_type
== ternary_op
)
4482 tree rhs
= gimple_assign_rhs1 (stmt
);
4483 ops
[0] = TREE_OPERAND (rhs
, 0);
4484 ops
[1] = TREE_OPERAND (rhs
, 1);
4485 ops
[2] = TREE_OPERAND (rhs
, 2);
4486 code
= TREE_CODE (rhs
);
4492 case GIMPLE_BINARY_RHS
:
4493 code
= gimple_assign_rhs_code (stmt
);
4494 op_type
= TREE_CODE_LENGTH (code
);
4495 gcc_assert (op_type
== binary_op
);
4496 ops
[0] = gimple_assign_rhs1 (stmt
);
4497 ops
[1] = gimple_assign_rhs2 (stmt
);
4500 case GIMPLE_TERNARY_RHS
:
4501 code
= gimple_assign_rhs_code (stmt
);
4502 op_type
= TREE_CODE_LENGTH (code
);
4503 gcc_assert (op_type
== ternary_op
);
4504 ops
[0] = gimple_assign_rhs1 (stmt
);
4505 ops
[1] = gimple_assign_rhs2 (stmt
);
4506 ops
[2] = gimple_assign_rhs3 (stmt
);
4509 case GIMPLE_UNARY_RHS
:
4516 if (code
== COND_EXPR
&& slp_node
)
4519 scalar_dest
= gimple_assign_lhs (stmt
);
4520 scalar_type
= TREE_TYPE (scalar_dest
);
4521 if (!POINTER_TYPE_P (scalar_type
) && !INTEGRAL_TYPE_P (scalar_type
)
4522 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
4525 /* Do not try to vectorize bit-precision reductions. */
4526 if ((TYPE_PRECISION (scalar_type
)
4527 != GET_MODE_PRECISION (TYPE_MODE (scalar_type
))))
4530 /* All uses but the last are expected to be defined in the loop.
4531 The last use is the reduction variable. In case of nested cycle this
4532 assumption is not true: we use reduc_index to record the index of the
4533 reduction variable. */
4534 for (i
= 0; i
< op_type
-1; i
++)
4536 /* The condition of COND_EXPR is checked in vectorizable_condition(). */
4537 if (i
== 0 && code
== COND_EXPR
)
4540 is_simple_use
= vect_is_simple_use_1 (ops
[i
], stmt
, loop_vinfo
, NULL
,
4541 &def_stmt
, &def
, &dt
, &tem
);
4544 gcc_assert (is_simple_use
);
4546 if (dt
!= vect_internal_def
4547 && dt
!= vect_external_def
4548 && dt
!= vect_constant_def
4549 && dt
!= vect_induction_def
4550 && !(dt
== vect_nested_cycle
&& nested_cycle
))
4553 if (dt
== vect_nested_cycle
)
4555 found_nested_cycle_def
= true;
4556 reduc_def_stmt
= def_stmt
;
4561 is_simple_use
= vect_is_simple_use_1 (ops
[i
], stmt
, loop_vinfo
, NULL
,
4562 &def_stmt
, &def
, &dt
, &tem
);
4565 gcc_assert (is_simple_use
);
4566 gcc_assert (dt
== vect_reduction_def
4567 || dt
== vect_nested_cycle
4568 || ((dt
== vect_internal_def
|| dt
== vect_external_def
4569 || dt
== vect_constant_def
|| dt
== vect_induction_def
)
4570 && nested_cycle
&& found_nested_cycle_def
));
4571 if (!found_nested_cycle_def
)
4572 reduc_def_stmt
= def_stmt
;
4574 gcc_assert (gimple_code (reduc_def_stmt
) == GIMPLE_PHI
);
4576 gcc_assert (orig_stmt
== vect_is_simple_reduction (loop_vinfo
,
4582 gimple tmp
= vect_is_simple_reduction (loop_vinfo
, reduc_def_stmt
,
4583 !nested_cycle
, &dummy
);
4584 /* We changed STMT to be the first stmt in reduction chain, hence we
4585 check that in this case the first element in the chain is STMT. */
4586 gcc_assert (stmt
== tmp
4587 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp
)) == stmt
);
4590 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt
)))
4593 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4596 ncopies
= (LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4597 / TYPE_VECTOR_SUBPARTS (vectype_in
));
4599 gcc_assert (ncopies
>= 1);
4601 vec_mode
= TYPE_MODE (vectype_in
);
4603 if (code
== COND_EXPR
)
4605 if (!vectorizable_condition (stmt
, gsi
, NULL
, ops
[reduc_index
], 0, NULL
))
4607 if (vect_print_dump_info (REPORT_DETAILS
))
4608 fprintf (vect_dump
, "unsupported condition in reduction");
4615 /* 4. Supportable by target? */
4617 /* 4.1. check support for the operation in the loop */
4618 optab
= optab_for_tree_code (code
, vectype_in
, optab_default
);
4621 if (vect_print_dump_info (REPORT_DETAILS
))
4622 fprintf (vect_dump
, "no optab.");
4627 if (optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
4629 if (vect_print_dump_info (REPORT_DETAILS
))
4630 fprintf (vect_dump
, "op not supported by target.");
4632 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4633 || LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4634 < vect_min_worthwhile_factor (code
))
4637 if (vect_print_dump_info (REPORT_DETAILS
))
4638 fprintf (vect_dump
, "proceeding using word mode.");
4641 /* Worthwhile without SIMD support? */
4642 if (!VECTOR_MODE_P (TYPE_MODE (vectype_in
))
4643 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
4644 < vect_min_worthwhile_factor (code
))
4646 if (vect_print_dump_info (REPORT_DETAILS
))
4647 fprintf (vect_dump
, "not worthwhile without SIMD support.");
4653 /* 4.2. Check support for the epilog operation.
4655 If STMT represents a reduction pattern, then the type of the
4656 reduction variable may be different than the type of the rest
4657 of the arguments. For example, consider the case of accumulation
4658 of shorts into an int accumulator; The original code:
4659 S1: int_a = (int) short_a;
4660 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
4663 STMT: int_acc = widen_sum <short_a, int_acc>
4666 1. The tree-code that is used to create the vector operation in the
4667 epilog code (that reduces the partial results) is not the
4668 tree-code of STMT, but is rather the tree-code of the original
4669 stmt from the pattern that STMT is replacing. I.e, in the example
4670 above we want to use 'widen_sum' in the loop, but 'plus' in the
4672 2. The type (mode) we use to check available target support
4673 for the vector operation to be created in the *epilog*, is
4674 determined by the type of the reduction variable (in the example
4675 above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4676 However the type (mode) we use to check available target support
4677 for the vector operation to be created *inside the loop*, is
4678 determined by the type of the other arguments to STMT (in the
4679 example we'd check this: optab_handler (widen_sum_optab,
4682 This is contrary to "regular" reductions, in which the types of all
4683 the arguments are the same as the type of the reduction variable.
4684 For "regular" reductions we can therefore use the same vector type
4685 (and also the same tree-code) when generating the epilog code and
4686 when generating the code inside the loop. */
4690 /* This is a reduction pattern: get the vectype from the type of the
4691 reduction variable, and get the tree-code from orig_stmt. */
4692 orig_code
= gimple_assign_rhs_code (orig_stmt
);
4693 gcc_assert (vectype_out
);
4694 vec_mode
= TYPE_MODE (vectype_out
);
4698 /* Regular reduction: use the same vectype and tree-code as used for
4699 the vector code inside the loop can be used for the epilog code. */
4705 def_bb
= gimple_bb (reduc_def_stmt
);
4706 def_stmt_loop
= def_bb
->loop_father
;
4707 def_arg
= PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt
,
4708 loop_preheader_edge (def_stmt_loop
));
4709 if (TREE_CODE (def_arg
) == SSA_NAME
4710 && (def_arg_stmt
= SSA_NAME_DEF_STMT (def_arg
))
4711 && gimple_code (def_arg_stmt
) == GIMPLE_PHI
4712 && flow_bb_inside_loop_p (outer_loop
, gimple_bb (def_arg_stmt
))
4713 && vinfo_for_stmt (def_arg_stmt
)
4714 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt
))
4715 == vect_double_reduction_def
)
4716 double_reduc
= true;
4719 epilog_reduc_code
= ERROR_MARK
;
4720 if (reduction_code_for_scalar_code (orig_code
, &epilog_reduc_code
))
4722 reduc_optab
= optab_for_tree_code (epilog_reduc_code
, vectype_out
,
4726 if (vect_print_dump_info (REPORT_DETAILS
))
4727 fprintf (vect_dump
, "no optab for reduction.");
4729 epilog_reduc_code
= ERROR_MARK
;
4733 && optab_handler (reduc_optab
, vec_mode
) == CODE_FOR_nothing
)
4735 if (vect_print_dump_info (REPORT_DETAILS
))
4736 fprintf (vect_dump
, "reduc op not supported by target.");
4738 epilog_reduc_code
= ERROR_MARK
;
4743 if (!nested_cycle
|| double_reduc
)
4745 if (vect_print_dump_info (REPORT_DETAILS
))
4746 fprintf (vect_dump
, "no reduc code for scalar code.");
4752 if (double_reduc
&& ncopies
> 1)
4754 if (vect_print_dump_info (REPORT_DETAILS
))
4755 fprintf (vect_dump
, "multiple types in double reduction");
4760 /* In case of widenning multiplication by a constant, we update the type
4761 of the constant to be the type of the other operand. We check that the
4762 constant fits the type in the pattern recognition pass. */
4763 if (code
== DOT_PROD_EXPR
4764 && !types_compatible_p (TREE_TYPE (ops
[0]), TREE_TYPE (ops
[1])))
4766 if (TREE_CODE (ops
[0]) == INTEGER_CST
)
4767 ops
[0] = fold_convert (TREE_TYPE (ops
[1]), ops
[0]);
4768 else if (TREE_CODE (ops
[1]) == INTEGER_CST
)
4769 ops
[1] = fold_convert (TREE_TYPE (ops
[0]), ops
[1]);
4772 if (vect_print_dump_info (REPORT_DETAILS
))
4773 fprintf (vect_dump
, "invalid types in dot-prod");
4779 if (!vec_stmt
) /* transformation not required. */
4781 if (!vect_model_reduction_cost (stmt_info
, epilog_reduc_code
, ncopies
))
4783 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
4789 if (vect_print_dump_info (REPORT_DETAILS
))
4790 fprintf (vect_dump
, "transform reduction.");
4792 /* FORNOW: Multiple types are not supported for condition. */
4793 if (code
== COND_EXPR
)
4794 gcc_assert (ncopies
== 1);
4796 /* Create the destination vector */
4797 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
4799 /* In case the vectorization factor (VF) is bigger than the number
4800 of elements that we can fit in a vectype (nunits), we have to generate
4801 more than one vector stmt - i.e - we need to "unroll" the
4802 vector stmt by a factor VF/nunits. For more details see documentation
4803 in vectorizable_operation. */
4805 /* If the reduction is used in an outer loop we need to generate
4806 VF intermediate results, like so (e.g. for ncopies=2):
4811 (i.e. we generate VF results in 2 registers).
4812 In this case we have a separate def-use cycle for each copy, and therefore
4813 for each copy we get the vector def for the reduction variable from the
4814 respective phi node created for this copy.
4816 Otherwise (the reduction is unused in the loop nest), we can combine
4817 together intermediate results, like so (e.g. for ncopies=2):
4821 (i.e. we generate VF/2 results in a single register).
4822 In this case for each copy we get the vector def for the reduction variable
4823 from the vectorized reduction operation generated in the previous iteration.
4826 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_unused_in_scope
)
4828 single_defuse_cycle
= true;
4832 epilog_copies
= ncopies
;
4834 prev_stmt_info
= NULL
;
4835 prev_phi_info
= NULL
;
4838 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4839 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out
)
4840 == TYPE_VECTOR_SUBPARTS (vectype_in
));
4845 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
4846 if (op_type
== ternary_op
)
4847 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
4850 phis
= VEC_alloc (gimple
, heap
, vec_num
);
4851 vect_defs
= VEC_alloc (tree
, heap
, vec_num
);
4853 VEC_quick_push (tree
, vect_defs
, NULL_TREE
);
4855 for (j
= 0; j
< ncopies
; j
++)
4857 if (j
== 0 || !single_defuse_cycle
)
4859 for (i
= 0; i
< vec_num
; i
++)
4861 /* Create the reduction-phi that defines the reduction
4863 new_phi
= create_phi_node (vec_dest
, loop
->header
);
4864 set_vinfo_for_stmt (new_phi
,
4865 new_stmt_vec_info (new_phi
, loop_vinfo
,
4867 if (j
== 0 || slp_node
)
4868 VEC_quick_push (gimple
, phis
, new_phi
);
4872 if (code
== COND_EXPR
)
4874 gcc_assert (!slp_node
);
4875 vectorizable_condition (stmt
, gsi
, vec_stmt
,
4876 PHI_RESULT (VEC_index (gimple
, phis
, 0)),
4878 /* Multiple types are not supported for condition. */
4885 op0
= ops
[!reduc_index
];
4886 if (op_type
== ternary_op
)
4888 if (reduc_index
== 0)
4895 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4899 loop_vec_def0
= vect_get_vec_def_for_operand (ops
[!reduc_index
],
4901 VEC_quick_push (tree
, vec_oprnds0
, loop_vec_def0
);
4902 if (op_type
== ternary_op
)
4904 loop_vec_def1
= vect_get_vec_def_for_operand (op1
, stmt
,
4906 VEC_quick_push (tree
, vec_oprnds1
, loop_vec_def1
);
4914 enum vect_def_type dt
;
4918 vect_is_simple_use (ops
[!reduc_index
], stmt
, loop_vinfo
, NULL
,
4919 &dummy_stmt
, &dummy
, &dt
);
4920 loop_vec_def0
= vect_get_vec_def_for_stmt_copy (dt
,
4922 VEC_replace (tree
, vec_oprnds0
, 0, loop_vec_def0
);
4923 if (op_type
== ternary_op
)
4925 vect_is_simple_use (op1
, stmt
, loop_vinfo
, NULL
, &dummy_stmt
,
4927 loop_vec_def1
= vect_get_vec_def_for_stmt_copy (dt
,
4929 VEC_replace (tree
, vec_oprnds1
, 0, loop_vec_def1
);
4933 if (single_defuse_cycle
)
4934 reduc_def
= gimple_assign_lhs (new_stmt
);
4936 STMT_VINFO_RELATED_STMT (prev_phi_info
) = new_phi
;
4939 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, def0
)
4942 reduc_def
= PHI_RESULT (VEC_index (gimple
, phis
, i
));
4945 if (!single_defuse_cycle
|| j
== 0)
4946 reduc_def
= PHI_RESULT (new_phi
);
4949 def1
= ((op_type
== ternary_op
)
4950 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL
);
4951 if (op_type
== binary_op
)
4953 if (reduc_index
== 0)
4954 expr
= build2 (code
, vectype_out
, reduc_def
, def0
);
4956 expr
= build2 (code
, vectype_out
, def0
, reduc_def
);
4960 if (reduc_index
== 0)
4961 expr
= build3 (code
, vectype_out
, reduc_def
, def0
, def1
);
4964 if (reduc_index
== 1)
4965 expr
= build3 (code
, vectype_out
, def0
, reduc_def
, def1
);
4967 expr
= build3 (code
, vectype_out
, def0
, def1
, reduc_def
);
4971 new_stmt
= gimple_build_assign (vec_dest
, expr
);
4972 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4973 gimple_assign_set_lhs (new_stmt
, new_temp
);
4974 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4978 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
4979 VEC_quick_push (tree
, vect_defs
, new_temp
);
4982 VEC_replace (tree
, vect_defs
, 0, new_temp
);
4989 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4991 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4993 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4994 prev_phi_info
= vinfo_for_stmt (new_phi
);
4997 /* Finalize the reduction-phi (set its arguments) and create the
4998 epilog reduction code. */
4999 if ((!single_defuse_cycle
|| code
== COND_EXPR
) && !slp_node
)
5001 new_temp
= gimple_assign_lhs (*vec_stmt
);
5002 VEC_replace (tree
, vect_defs
, 0, new_temp
);
5005 vect_create_epilog_for_reduction (vect_defs
, stmt
, epilog_copies
,
5006 epilog_reduc_code
, phis
, reduc_index
,
5007 double_reduc
, slp_node
);
5009 VEC_free (gimple
, heap
, phis
);
5010 VEC_free (tree
, heap
, vec_oprnds0
);
5012 VEC_free (tree
, heap
, vec_oprnds1
);
5017 /* Function vect_min_worthwhile_factor.
5019 For a loop where we could vectorize the operation indicated by CODE,
5020 return the minimum vectorization factor that makes it worthwhile
5021 to use generic vectors. */
5023 vect_min_worthwhile_factor (enum tree_code code
)
5044 /* Function vectorizable_induction
5046 Check if PHI performs an induction computation that can be vectorized.
5047 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5048 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5049 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5052 vectorizable_induction (gimple phi
, gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
5055 stmt_vec_info stmt_info
= vinfo_for_stmt (phi
);
5056 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5057 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5058 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5059 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5060 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5063 gcc_assert (ncopies
>= 1);
5064 /* FORNOW. These restrictions should be relaxed. */
5065 if (nested_in_vect_loop_p (loop
, phi
))
5067 imm_use_iterator imm_iter
;
5068 use_operand_p use_p
;
5075 if (vect_print_dump_info (REPORT_DETAILS
))
5076 fprintf (vect_dump
, "multiple types in nested loop.");
5081 latch_e
= loop_latch_edge (loop
->inner
);
5082 loop_arg
= PHI_ARG_DEF_FROM_EDGE (phi
, latch_e
);
5083 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, loop_arg
)
5085 if (!flow_bb_inside_loop_p (loop
->inner
,
5086 gimple_bb (USE_STMT (use_p
))))
5088 exit_phi
= USE_STMT (use_p
);
5094 stmt_vec_info exit_phi_vinfo
= vinfo_for_stmt (exit_phi
);
5095 if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo
)
5096 && !STMT_VINFO_LIVE_P (exit_phi_vinfo
)))
5098 if (vect_print_dump_info (REPORT_DETAILS
))
5099 fprintf (vect_dump
, "inner-loop induction only used outside "
5100 "of the outer vectorized loop.");
5106 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
5109 /* FORNOW: SLP not supported. */
5110 if (STMT_SLP_TYPE (stmt_info
))
5113 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
);
5115 if (gimple_code (phi
) != GIMPLE_PHI
)
5118 if (!vec_stmt
) /* transformation not required. */
5120 STMT_VINFO_TYPE (stmt_info
) = induc_vec_info_type
;
5121 if (vect_print_dump_info (REPORT_DETAILS
))
5122 fprintf (vect_dump
, "=== vectorizable_induction ===");
5123 vect_model_induction_cost (stmt_info
, ncopies
);
5129 if (vect_print_dump_info (REPORT_DETAILS
))
5130 fprintf (vect_dump
, "transform induction phi.");
5132 vec_def
= get_initial_def_for_induction (phi
);
5133 *vec_stmt
= SSA_NAME_DEF_STMT (vec_def
);
5137 /* Function vectorizable_live_operation.
5139 STMT computes a value that is used outside the loop. Check if
5140 it can be supported. */
5143 vectorizable_live_operation (gimple stmt
,
5144 gimple_stmt_iterator
*gsi ATTRIBUTE_UNUSED
,
5145 gimple
*vec_stmt ATTRIBUTE_UNUSED
)
5147 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5148 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5149 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5155 enum vect_def_type dt
;
5156 enum tree_code code
;
5157 enum gimple_rhs_class rhs_class
;
5159 gcc_assert (STMT_VINFO_LIVE_P (stmt_info
));
5161 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
5164 if (!is_gimple_assign (stmt
))
5167 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5170 /* FORNOW. CHECKME. */
5171 if (nested_in_vect_loop_p (loop
, stmt
))
5174 code
= gimple_assign_rhs_code (stmt
);
5175 op_type
= TREE_CODE_LENGTH (code
);
5176 rhs_class
= get_gimple_rhs_class (code
);
5177 gcc_assert (rhs_class
!= GIMPLE_UNARY_RHS
|| op_type
== unary_op
);
5178 gcc_assert (rhs_class
!= GIMPLE_BINARY_RHS
|| op_type
== binary_op
);
5180 /* FORNOW: support only if all uses are invariant. This means
5181 that the scalar operations can remain in place, unvectorized.
5182 The original last scalar value that they compute will be used. */
5184 for (i
= 0; i
< op_type
; i
++)
5186 if (rhs_class
== GIMPLE_SINGLE_RHS
)
5187 op
= TREE_OPERAND (gimple_op (stmt
, 1), i
);
5189 op
= gimple_op (stmt
, i
+ 1);
5191 && !vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
,
5194 if (vect_print_dump_info (REPORT_DETAILS
))
5195 fprintf (vect_dump
, "use not simple.");
5199 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
5203 /* No transformation is required for the cases we currently support. */
5207 /* Kill any debug uses outside LOOP of SSA names defined in STMT. */
5210 vect_loop_kill_debug_uses (struct loop
*loop
, gimple stmt
)
5212 ssa_op_iter op_iter
;
5213 imm_use_iterator imm_iter
;
5214 def_operand_p def_p
;
5217 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
5219 FOR_EACH_IMM_USE_STMT (ustmt
, imm_iter
, DEF_FROM_PTR (def_p
))
5223 if (!is_gimple_debug (ustmt
))
5226 bb
= gimple_bb (ustmt
);
5228 if (!flow_bb_inside_loop_p (loop
, bb
))
5230 if (gimple_debug_bind_p (ustmt
))
5232 if (vect_print_dump_info (REPORT_DETAILS
))
5233 fprintf (vect_dump
, "killing debug use");
5235 gimple_debug_bind_reset_value (ustmt
);
5236 update_stmt (ustmt
);
5245 /* Function vect_transform_loop.
5247 The analysis phase has determined that the loop is vectorizable.
5248 Vectorize the loop - created vectorized stmts to replace the scalar
5249 stmts in the loop, and update the loop exit condition. */
5252 vect_transform_loop (loop_vec_info loop_vinfo
)
5254 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5255 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
5256 int nbbs
= loop
->num_nodes
;
5257 gimple_stmt_iterator si
;
5260 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5262 bool slp_scheduled
= false;
5263 unsigned int nunits
;
5264 gimple stmt
, pattern_stmt
;
5265 gimple_seq pattern_def_seq
= NULL
;
5266 gimple_stmt_iterator pattern_def_si
= gsi_none ();
5267 bool transform_pattern_stmt
= false;
5268 bool check_profitability
;
5271 if (vect_print_dump_info (REPORT_DETAILS
))
5272 fprintf (vect_dump
, "=== vec_transform_loop ===");
5274 /* Use the more conservative vectorization threshold. If the number
5275 of iterations is constant assume the cost check has been performed
5276 by our caller. If the threshold makes all loops profitable that
5277 run at least the vectorization factor number of times checking
5278 is pointless, too. */
5279 th
= ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND
)
5280 * LOOP_VINFO_VECT_FACTOR (loop_vinfo
)) - 1);
5281 th
= MAX (th
, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo
));
5282 if (th
>= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - 1
5283 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5285 if (vect_print_dump_info (REPORT_COST
))
5287 "Profitability threshold is %d loop iterations.", th
);
5288 check_profitability
= true;
5291 /* Peel the loop if there are data refs with unknown alignment.
5292 Only one data ref with unknown store is allowed. */
5294 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
))
5296 vect_do_peeling_for_alignment (loop_vinfo
, th
, check_profitability
);
5297 check_profitability
= false;
5300 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
)
5301 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
))
5303 vect_loop_versioning (loop_vinfo
, th
, check_profitability
);
5304 check_profitability
= false;
5307 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5308 compile time constant), or it is a constant that doesn't divide by the
5309 vectorization factor, then an epilog loop needs to be created.
5310 We therefore duplicate the loop: the original loop will be vectorized,
5311 and will compute the first (n/VF) iterations. The second copy of the loop
5312 will remain scalar and will compute the remaining (n%VF) iterations.
5313 (VF is the vectorization factor). */
5315 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
5316 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
5317 && LOOP_VINFO_INT_NITERS (loop_vinfo
) % vectorization_factor
!= 0)
5318 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
))
5319 vect_do_peeling_for_loop_bound (loop_vinfo
, &ratio
,
5320 th
, check_profitability
);
5322 ratio
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo
)),
5323 LOOP_VINFO_INT_NITERS (loop_vinfo
) / vectorization_factor
);
5325 /* 1) Make sure the loop header has exactly two entries
5326 2) Make sure we have a preheader basic block. */
5328 gcc_assert (EDGE_COUNT (loop
->header
->preds
) == 2);
5330 split_edge (loop_preheader_edge (loop
));
5332 /* FORNOW: the vectorizer supports only loops which body consist
5333 of one basic block (header + empty latch). When the vectorizer will
5334 support more involved loop forms, the order by which the BBs are
5335 traversed need to be reconsidered. */
5337 for (i
= 0; i
< nbbs
; i
++)
5339 basic_block bb
= bbs
[i
];
5340 stmt_vec_info stmt_info
;
5343 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
5345 phi
= gsi_stmt (si
);
5346 if (vect_print_dump_info (REPORT_DETAILS
))
5348 fprintf (vect_dump
, "------>vectorizing phi: ");
5349 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
5351 stmt_info
= vinfo_for_stmt (phi
);
5355 if (MAY_HAVE_DEBUG_STMTS
&& !STMT_VINFO_LIVE_P (stmt_info
))
5356 vect_loop_kill_debug_uses (loop
, phi
);
5358 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5359 && !STMT_VINFO_LIVE_P (stmt_info
))
5362 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
))
5363 != (unsigned HOST_WIDE_INT
) vectorization_factor
)
5364 && vect_print_dump_info (REPORT_DETAILS
))
5365 fprintf (vect_dump
, "multiple-types.");
5367 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
5369 if (vect_print_dump_info (REPORT_DETAILS
))
5370 fprintf (vect_dump
, "transform phi.");
5371 vect_transform_stmt (phi
, NULL
, NULL
, NULL
, NULL
);
5375 pattern_stmt
= NULL
;
5376 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
) || transform_pattern_stmt
;)
5380 if (transform_pattern_stmt
)
5381 stmt
= pattern_stmt
;
5383 stmt
= gsi_stmt (si
);
5385 if (vect_print_dump_info (REPORT_DETAILS
))
5387 fprintf (vect_dump
, "------>vectorizing statement: ");
5388 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5391 stmt_info
= vinfo_for_stmt (stmt
);
5393 /* vector stmts created in the outer-loop during vectorization of
5394 stmts in an inner-loop may not have a stmt_info, and do not
5395 need to be vectorized. */
5402 if (MAY_HAVE_DEBUG_STMTS
&& !STMT_VINFO_LIVE_P (stmt_info
))
5403 vect_loop_kill_debug_uses (loop
, stmt
);
5405 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5406 && !STMT_VINFO_LIVE_P (stmt_info
))
5408 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5409 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
5410 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5411 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5413 stmt
= pattern_stmt
;
5414 stmt_info
= vinfo_for_stmt (stmt
);
5422 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5423 && (pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
))
5424 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5425 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5426 transform_pattern_stmt
= true;
5428 /* If pattern statement has def stmts, vectorize them too. */
5429 if (is_pattern_stmt_p (stmt_info
))
5431 if (pattern_def_seq
== NULL
)
5433 pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
);
5434 pattern_def_si
= gsi_start (pattern_def_seq
);
5436 else if (!gsi_end_p (pattern_def_si
))
5437 gsi_next (&pattern_def_si
);
5438 if (pattern_def_seq
!= NULL
)
5440 gimple pattern_def_stmt
= NULL
;
5441 stmt_vec_info pattern_def_stmt_info
= NULL
;
5443 while (!gsi_end_p (pattern_def_si
))
5445 pattern_def_stmt
= gsi_stmt (pattern_def_si
);
5446 pattern_def_stmt_info
5447 = vinfo_for_stmt (pattern_def_stmt
);
5448 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
5449 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
5451 gsi_next (&pattern_def_si
);
5454 if (!gsi_end_p (pattern_def_si
))
5456 if (vect_print_dump_info (REPORT_DETAILS
))
5458 fprintf (vect_dump
, "==> vectorizing pattern def"
5460 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0,
5464 stmt
= pattern_def_stmt
;
5465 stmt_info
= pattern_def_stmt_info
;
5469 pattern_def_si
= gsi_none ();
5470 transform_pattern_stmt
= false;
5474 transform_pattern_stmt
= false;
5477 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5478 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (
5479 STMT_VINFO_VECTYPE (stmt_info
));
5480 if (!STMT_SLP_TYPE (stmt_info
)
5481 && nunits
!= (unsigned int) vectorization_factor
5482 && vect_print_dump_info (REPORT_DETAILS
))
5483 /* For SLP VF is set according to unrolling factor, and not to
5484 vector size, hence for SLP this print is not valid. */
5485 fprintf (vect_dump
, "multiple-types.");
5487 /* SLP. Schedule all the SLP instances when the first SLP stmt is
5489 if (STMT_SLP_TYPE (stmt_info
))
5493 slp_scheduled
= true;
5495 if (vect_print_dump_info (REPORT_DETAILS
))
5496 fprintf (vect_dump
, "=== scheduling SLP instances ===");
5498 vect_schedule_slp (loop_vinfo
, NULL
);
5501 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
5502 if (!vinfo_for_stmt (stmt
) || PURE_SLP_STMT (stmt_info
))
5504 if (!transform_pattern_stmt
&& gsi_end_p (pattern_def_si
))
5506 pattern_def_seq
= NULL
;
5513 /* -------- vectorize statement ------------ */
5514 if (vect_print_dump_info (REPORT_DETAILS
))
5515 fprintf (vect_dump
, "transform statement.");
5517 grouped_store
= false;
5518 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, NULL
, NULL
);
5521 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5523 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5524 interleaving chain was completed - free all the stores in
5527 vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info
));
5532 /* Free the attached stmt_vec_info and remove the stmt. */
5533 gimple store
= gsi_stmt (si
);
5534 free_stmt_vec_info (store
);
5535 unlink_stmt_vdef (store
);
5536 gsi_remove (&si
, true);
5537 release_defs (store
);
5542 if (!transform_pattern_stmt
&& gsi_end_p (pattern_def_si
))
5544 pattern_def_seq
= NULL
;
5550 slpeel_make_loop_iterate_ntimes (loop
, ratio
);
5552 /* The memory tags and pointers in vectorized statements need to
5553 have their SSA forms updated. FIXME, why can't this be delayed
5554 until all the loops have been transformed? */
5555 update_ssa (TODO_update_ssa
);
5557 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
))
5558 fprintf (vect_dump
, "LOOP VECTORIZED.");
5559 if (loop
->inner
&& vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
))
5560 fprintf (vect_dump
, "OUTER LOOP VECTORIZED.");