-
-
-/* Free stmt vectorization related info. */
-
-void
-free_stmt_vec_info (tree stmt)
-{
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
- if (!stmt_info)
- return;
-
- VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
- free (stmt_info);
- set_stmt_info (stmt_ann (stmt), NULL);
-}
-
-
-/* Function bb_in_loop_p
-
- Used as predicate for dfs order traversal of the loop bbs. */
-
-static bool
-bb_in_loop_p (const_basic_block bb, const void *data)
-{
- const struct loop *const loop = (const struct loop *)data;
- if (flow_bb_inside_loop_p (loop, bb))
- return true;
- return false;
-}
-
-
-/* Function new_loop_vec_info.
-
- Create and initialize a new loop_vec_info struct for LOOP, as well as
- stmt_vec_info structs for all the stmts in LOOP. */
-
-loop_vec_info
-new_loop_vec_info (struct loop *loop)
-{
- loop_vec_info res;
- basic_block *bbs;
- block_stmt_iterator si;
- unsigned int i, nbbs;
-
- res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
- LOOP_VINFO_LOOP (res) = loop;
-
- bbs = get_loop_body (loop);
-
- /* Create/Update stmt_info for all stmts in the loop. */
- for (i = 0; i < loop->num_nodes; i++)
- {
- basic_block bb = bbs[i];
- tree phi;
-
- /* BBs in a nested inner-loop will have been already processed (because
- we will have called vect_analyze_loop_form for any nested inner-loop).
- Therefore, for stmts in an inner-loop we just want to update the
- STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
- loop_info of the outer-loop we are currently considering to vectorize
- (instead of the loop_info of the inner-loop).
- For stmts in other BBs we need to create a stmt_info from scratch. */
- if (bb->loop_father != loop)
- {
- /* Inner-loop bb. */
- gcc_assert (loop->inner && bb->loop_father == loop->inner);
- for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
- {
- stmt_vec_info stmt_info = vinfo_for_stmt (phi);
- loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
- STMT_VINFO_LOOP_VINFO (stmt_info) = res;
- }
- for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
- {
- tree stmt = bsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
- STMT_VINFO_LOOP_VINFO (stmt_info) = res;
- }
- }
- else
- {
- /* bb in current nest. */
- for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
- {
- stmt_ann_t ann = get_stmt_ann (phi);
- set_stmt_info (ann, new_stmt_vec_info (phi, res));
- }
-
- for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
- {
- tree stmt = bsi_stmt (si);
- stmt_ann_t ann = stmt_ann (stmt);
- set_stmt_info (ann, new_stmt_vec_info (stmt, res));
- }
- }
- }
-
- /* CHECKME: We want to visit all BBs before their successors (except for
- latch blocks, for which this assertion wouldn't hold). In the simple
- case of the loop forms we allow, a dfs order of the BBs would the same
- as reversed postorder traversal, so we are safe. */
-
- free (bbs);
- bbs = XCNEWVEC (basic_block, loop->num_nodes);
- nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
- bbs, loop->num_nodes, loop);
- gcc_assert (nbbs == loop->num_nodes);
-
- LOOP_VINFO_BBS (res) = bbs;
- LOOP_VINFO_NITERS (res) = NULL;
- LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
- LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
- LOOP_VINFO_VECTORIZABLE_P (res) = 0;
- LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
- LOOP_VINFO_VECT_FACTOR (res) = 0;
- LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
- LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
- LOOP_VINFO_UNALIGNED_DR (res) = NULL;
- LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
- VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
- LOOP_VINFO_MAY_ALIAS_DDRS (res) =
- VEC_alloc (ddr_p, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
- LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (tree, heap, 10);
- LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
- LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
-
- return res;
-}
-
-
-/* Function destroy_loop_vec_info.
-
- Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
- stmts in the loop. */
-
-void
-destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
-{
- struct loop *loop;
- basic_block *bbs;
- int nbbs;
- block_stmt_iterator si;
- int j;
- VEC (slp_instance, heap) *slp_instances;
- slp_instance instance;
-
- if (!loop_vinfo)
- return;
-
- loop = LOOP_VINFO_LOOP (loop_vinfo);
-
- bbs = LOOP_VINFO_BBS (loop_vinfo);
- nbbs = loop->num_nodes;
-
- if (!clean_stmts)
- {
- free (LOOP_VINFO_BBS (loop_vinfo));
- free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
- free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
- VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
-
- free (loop_vinfo);
- loop->aux = NULL;
- return;
- }
-
- for (j = 0; j < nbbs; j++)
- {
- basic_block bb = bbs[j];
- tree phi;
-
- for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
- free_stmt_vec_info (phi);
-
- for (si = bsi_start (bb); !bsi_end_p (si); )
- {
- tree stmt = bsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
- if (stmt_info)
- {
- /* Check if this is a "pattern stmt" (introduced by the
- vectorizer during the pattern recognition pass). */
- bool remove_stmt_p = false;
- tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
- if (orig_stmt)
- {
- stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
- if (orig_stmt_info
- && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
- remove_stmt_p = true;
- }
-
- /* Free stmt_vec_info. */
- free_stmt_vec_info (stmt);
-
- /* Remove dead "pattern stmts". */
- if (remove_stmt_p)
- bsi_remove (&si, true);
- }
- bsi_next (&si);
- }
- }
-
- free (LOOP_VINFO_BBS (loop_vinfo));
- free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
- free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
- VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
- VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
- slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
- for (j = 0; VEC_iterate (slp_instance, slp_instances, j, instance); j++)
- vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
- VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
- VEC_free (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
-
- free (loop_vinfo);
- loop->aux = NULL;
-}
-
-
-/* Function vect_force_dr_alignment_p.
-
- Returns whether the alignment of a DECL can be forced to be aligned
- on ALIGNMENT bit boundary. */
-
-bool
-vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
-{
- if (TREE_CODE (decl) != VAR_DECL)
- return false;
-
- if (DECL_EXTERNAL (decl))
- return false;
-
- if (TREE_ASM_WRITTEN (decl))
- return false;
-
- if (TREE_STATIC (decl))
- return (alignment <= MAX_OFILE_ALIGNMENT);
- else
- /* This used to be PREFERRED_STACK_BOUNDARY, however, that is not 100%
- correct until someone implements forced stack alignment. */
- return (alignment <= STACK_BOUNDARY);
-}
-
-
-/* Function get_vectype_for_scalar_type.
-
- Returns the vector type corresponding to SCALAR_TYPE as supported
- by the target. */
-
-tree
-get_vectype_for_scalar_type (tree scalar_type)
-{
- enum machine_mode inner_mode = TYPE_MODE (scalar_type);
- int nbytes = GET_MODE_SIZE (inner_mode);
- int nunits;
- tree vectype;
-
- if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
- return NULL_TREE;
-
- /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
- is expected. */
- nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
-
- vectype = build_vector_type (scalar_type, nunits);
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "get vectype with %d units of type ", nunits);
- print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
- }
-
- if (!vectype)
- return NULL_TREE;
-
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "vectype: ");
- print_generic_expr (vect_dump, vectype, TDF_SLIM);
- }
-
- if (!VECTOR_MODE_P (TYPE_MODE (vectype))
- && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "mode not supported by target.");
- return NULL_TREE;
- }
-
- return vectype;
-}
-
-
-/* Function vect_supportable_dr_alignment
-
- Return whether the data reference DR is supported with respect to its
- alignment. */
-
-enum dr_alignment_support
-vect_supportable_dr_alignment (struct data_reference *dr)
-{
- tree stmt = DR_STMT (dr);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- enum machine_mode mode = (int) TYPE_MODE (vectype);
- struct loop *vect_loop = LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info));
- bool nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
- bool invariant_in_outerloop = false;
-
- if (aligned_access_p (dr))
- return dr_aligned;
-
- if (nested_in_vect_loop)
- {
- tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
- invariant_in_outerloop =
- (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
- }
-
- /* Possibly unaligned access. */
-
- /* We can choose between using the implicit realignment scheme (generating
- a misaligned_move stmt) and the explicit realignment scheme (generating
- aligned loads with a REALIGN_LOAD). There are two variants to the explicit
- realignment scheme: optimized, and unoptimized.
- We can optimize the realignment only if the step between consecutive
- vector loads is equal to the vector size. Since the vector memory
- accesses advance in steps of VS (Vector Size) in the vectorized loop, it
- is guaranteed that the misalignment amount remains the same throughout the
- execution of the vectorized loop. Therefore, we can create the
- "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
- at the loop preheader.
-
- However, in the case of outer-loop vectorization, when vectorizing a
- memory access in the inner-loop nested within the LOOP that is now being
- vectorized, while it is guaranteed that the misalignment of the
- vectorized memory access will remain the same in different outer-loop
- iterations, it is *not* guaranteed that is will remain the same throughout
- the execution of the inner-loop. This is because the inner-loop advances
- with the original scalar step (and not in steps of VS). If the inner-loop
- step happens to be a multiple of VS, then the misalignment remains fixed
- and we can use the optimized realignment scheme. For example:
-
- for (i=0; i<N; i++)
- for (j=0; j<M; j++)
- s += a[i+j];
-
- When vectorizing the i-loop in the above example, the step between
- consecutive vector loads is 1, and so the misalignment does not remain
- fixed across the execution of the inner-loop, and the realignment cannot
- be optimized (as illustrated in the following pseudo vectorized loop):
-
- for (i=0; i<N; i+=4)
- for (j=0; j<M; j++){
- vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
- // when j is {0,1,2,3,4,5,6,7,...} respectively.
- // (assuming that we start from an aligned address).
- }
-
- We therefore have to use the unoptimized realignment scheme:
-
- for (i=0; i<N; i+=4)
- for (j=k; j<M; j+=4)
- vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
- // that the misalignment of the initial address is
- // 0).
-
- The loop can then be vectorized as follows:
-
- for (k=0; k<4; k++){
- rt = get_realignment_token (&vp[k]);
- for (i=0; i<N; i+=4){
- v1 = vp[i+k];
- for (j=k; j<M; j+=4){
- v2 = vp[i+j+VS-1];
- va = REALIGN_LOAD <v1,v2,rt>;
- vs += va;
- v1 = v2;
- }
- }
- } */
-
- if (DR_IS_READ (dr))
- {
- if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
- CODE_FOR_nothing
- && (!targetm.vectorize.builtin_mask_for_load
- || targetm.vectorize.builtin_mask_for_load ()))
- {
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- if (nested_in_vect_loop
- && (TREE_INT_CST_LOW (DR_STEP (dr))
- != GET_MODE_SIZE (TYPE_MODE (vectype))))
- return dr_explicit_realign;
- else
- return dr_explicit_realign_optimized;
- }
-
- if (optab_handler (movmisalign_optab, mode)->insn_code !=
- CODE_FOR_nothing)
- /* Can't software pipeline the loads, but can at least do them. */
- return dr_unaligned_supported;
- }
-
- /* Unsupported. */
- return dr_unaligned_unsupported;
-}
-
-
-/* Function vect_is_simple_use.
-
- Input:
- LOOP - the loop that is being vectorized.
- OPERAND - operand of a stmt in LOOP.
- DEF - the defining stmt in case OPERAND is an SSA_NAME.
-
- Returns whether a stmt with OPERAND can be vectorized.
- Supportable operands are constants, loop invariants, and operands that are
- defined by the current iteration of the loop. Unsupportable operands are
- those that are defined by a previous iteration of the loop (as is the case
- in reduction/induction computations). */
-
-bool
-vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
- tree *def, enum vect_def_type *dt)
-{
- basic_block bb;
- stmt_vec_info stmt_vinfo;
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-
- *def_stmt = NULL_TREE;
- *def = NULL_TREE;
-
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "vect_is_simple_use: operand ");
- print_generic_expr (vect_dump, operand, TDF_SLIM);
- }
-
- if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
- {
- *dt = vect_constant_def;
- return true;
- }
- if (is_gimple_min_invariant (operand))
- {
- *def = operand;
- *dt = vect_invariant_def;
- return true;
- }
-
- if (TREE_CODE (operand) == PAREN_EXPR)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "non-associatable copy.");
- operand = TREE_OPERAND (operand, 0);
- }
- if (TREE_CODE (operand) != SSA_NAME)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "not ssa-name.");
- return false;
- }
-
- *def_stmt = SSA_NAME_DEF_STMT (operand);
- if (*def_stmt == NULL_TREE )
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "no def_stmt.");
- return false;
- }
-
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "def_stmt: ");
- print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
- }
-
- /* empty stmt is expected only in case of a function argument.
- (Otherwise - we expect a phi_node or a GIMPLE_MODIFY_STMT). */
- if (IS_EMPTY_STMT (*def_stmt))
- {
- tree arg = TREE_OPERAND (*def_stmt, 0);
- if (is_gimple_min_invariant (arg))
- {
- *def = operand;
- *dt = vect_invariant_def;
- return true;
- }
-
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "Unexpected empty stmt.");
- return false;
- }
-
- bb = bb_for_stmt (*def_stmt);
- if (!flow_bb_inside_loop_p (loop, bb))
- *dt = vect_invariant_def;
- else
- {
- stmt_vinfo = vinfo_for_stmt (*def_stmt);
- *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
- }
-
- if (*dt == vect_unknown_def_type)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "Unsupported pattern.");
- return false;
- }
-
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "type of def: %d.",*dt);
-
- switch (TREE_CODE (*def_stmt))
- {
- case PHI_NODE:
- *def = PHI_RESULT (*def_stmt);
- break;
-
- case GIMPLE_MODIFY_STMT:
- *def = GIMPLE_STMT_OPERAND (*def_stmt, 0);
- break;
-
- default:
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "unsupported defining stmt: ");
- return false;
- }
-
- return true;
-}
-
-
-/* Function supportable_widening_operation
-
- Check whether an operation represented by the code CODE is a
- widening operation that is supported by the target platform in
- vector form (i.e., when operating on arguments of type VECTYPE).
-
- Widening operations we currently support are NOP (CONVERT), FLOAT
- and WIDEN_MULT. This function checks if these operations are supported
- by the target platform either directly (via vector tree-codes), or via
- target builtins.
-
- Output:
- - CODE1 and CODE2 are codes of vector operations to be used when
- vectorizing the operation, if available.
- - DECL1 and DECL2 are decls of target builtin functions to be used
- when vectorizing the operation, if available. In this case,
- CODE1 and CODE2 are CALL_EXPR. */
-
-bool
-supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
- tree *decl1, tree *decl2,
- enum tree_code *code1, enum tree_code *code2)
-{
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
- bool ordered_p;
- enum machine_mode vec_mode;
- enum insn_code icode1, icode2;
- optab optab1, optab2;
- tree expr = GIMPLE_STMT_OPERAND (stmt, 1);
- tree type = TREE_TYPE (expr);
- tree wide_vectype = get_vectype_for_scalar_type (type);
- enum tree_code c1, c2;
-
- /* The result of a vectorized widening operation usually requires two vectors
- (because the widened results do not fit int one vector). The generated
- vector results would normally be expected to be generated in the same
- order as in the original scalar computation, i.e. if 8 results are
- generated in each vector iteration, they are to be organized as follows:
- vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
-
- However, in the special case that the result of the widening operation is
- used in a reduction computation only, the order doesn't matter (because
- when vectorizing a reduction we change the order of the computation).
- Some targets can take advantage of this and generate more efficient code.
- For example, targets like Altivec, that support widen_mult using a sequence
- of {mult_even,mult_odd} generate the following vectors:
- vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
-
- When vectorizing outer-loops, we execute the inner-loop sequentially
- (each vectorized inner-loop iteration contributes to VF outer-loop
- iterations in parallel). We therefore don't allow to change the order
- of the computation in the inner-loop during outer-loop vectorization. */
-
- if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
- && !nested_in_vect_loop_p (vect_loop, stmt))
- ordered_p = false;
- else
- ordered_p = true;
-
- if (!ordered_p
- && code == WIDEN_MULT_EXPR
- && targetm.vectorize.builtin_mul_widen_even
- && targetm.vectorize.builtin_mul_widen_even (vectype)
- && targetm.vectorize.builtin_mul_widen_odd
- && targetm.vectorize.builtin_mul_widen_odd (vectype))
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "Unordered widening operation detected.");
-
- *code1 = *code2 = CALL_EXPR;
- *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
- *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
- return true;
- }
-
- switch (code)
- {
- case WIDEN_MULT_EXPR:
- if (BYTES_BIG_ENDIAN)
- {
- c1 = VEC_WIDEN_MULT_HI_EXPR;
- c2 = VEC_WIDEN_MULT_LO_EXPR;
- }
- else
- {
- c2 = VEC_WIDEN_MULT_HI_EXPR;
- c1 = VEC_WIDEN_MULT_LO_EXPR;
- }
- break;
-
- CASE_CONVERT:
- if (BYTES_BIG_ENDIAN)
- {
- c1 = VEC_UNPACK_HI_EXPR;
- c2 = VEC_UNPACK_LO_EXPR;
- }
- else
- {
- c2 = VEC_UNPACK_HI_EXPR;
- c1 = VEC_UNPACK_LO_EXPR;
- }
- break;
-
- case FLOAT_EXPR:
- if (BYTES_BIG_ENDIAN)
- {
- c1 = VEC_UNPACK_FLOAT_HI_EXPR;
- c2 = VEC_UNPACK_FLOAT_LO_EXPR;
- }
- else
- {
- c2 = VEC_UNPACK_FLOAT_HI_EXPR;
- c1 = VEC_UNPACK_FLOAT_LO_EXPR;
- }
- break;
-
- case FIX_TRUNC_EXPR:
- /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
- VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
- computing the operation. */
- return false;
-
- default:
- gcc_unreachable ();
- }
-
- if (code == FIX_TRUNC_EXPR)
- {
- /* The signedness is determined from output operand. */
- optab1 = optab_for_tree_code (c1, type, optab_default);
- optab2 = optab_for_tree_code (c2, type, optab_default);
- }
- else
- {
- optab1 = optab_for_tree_code (c1, vectype, optab_default);
- optab2 = optab_for_tree_code (c2, vectype, optab_default);
- }
-
- if (!optab1 || !optab2)
- return false;
-
- vec_mode = TYPE_MODE (vectype);
- if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
- || insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
- || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
- == CODE_FOR_nothing
- || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
- return false;
-
- *code1 = c1;
- *code2 = c2;
- return true;
-}
-
-
-/* Function supportable_narrowing_operation
-
- Check whether an operation represented by the code CODE is a
- narrowing operation that is supported by the target platform in
- vector form (i.e., when operating on arguments of type VECTYPE).
-
- Narrowing operations we currently support are NOP (CONVERT) and
- FIX_TRUNC. This function checks if these operations are supported by
- the target platform directly via vector tree-codes.
-
- Output:
- - CODE1 is the code of a vector operation to be used when
- vectorizing the operation, if available. */
-
-bool
-supportable_narrowing_operation (enum tree_code code,
- const_tree stmt, const_tree vectype,
- enum tree_code *code1)
-{
- enum machine_mode vec_mode;
- enum insn_code icode1;
- optab optab1;
- tree expr = GIMPLE_STMT_OPERAND (stmt, 1);
- tree type = TREE_TYPE (expr);
- tree narrow_vectype = get_vectype_for_scalar_type (type);
- enum tree_code c1;
-
- switch (code)
- {
- CASE_CONVERT:
- c1 = VEC_PACK_TRUNC_EXPR;
- break;
-
- case FIX_TRUNC_EXPR:
- c1 = VEC_PACK_FIX_TRUNC_EXPR;
- break;
-
- case FLOAT_EXPR:
- /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
- tree code and optabs used for computing the operation. */
- return false;
-
- default:
- gcc_unreachable ();
- }
-
- if (code == FIX_TRUNC_EXPR)
- /* The signedness is determined from output operand. */
- optab1 = optab_for_tree_code (c1, type, optab_default);
- else
- optab1 = optab_for_tree_code (c1, vectype, optab_default);
-
- if (!optab1)
- return false;
-
- vec_mode = TYPE_MODE (vectype);
- if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
- || insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
- return false;
-
- *code1 = c1;
- return true;
-}
-
-
-/* Function reduction_code_for_scalar_code
-
- Input:
- CODE - tree_code of a reduction operations.
-
- Output:
- REDUC_CODE - the corresponding tree-code to be used to reduce the
- vector of partial results into a single scalar result (which
- will also reside in a vector).
-
- Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */
-
-bool
-reduction_code_for_scalar_code (enum tree_code code,
- enum tree_code *reduc_code)
-{
- switch (code)
- {
- case MAX_EXPR:
- *reduc_code = REDUC_MAX_EXPR;
- return true;
-
- case MIN_EXPR:
- *reduc_code = REDUC_MIN_EXPR;
- return true;
-
- case PLUS_EXPR:
- *reduc_code = REDUC_PLUS_EXPR;
- return true;
-
- default:
- return false;
- }
-}
-
-
-/* Function vect_is_simple_reduction
-
- Detect a cross-iteration def-use cycle that represents a simple
- reduction computation. We look for the following pattern:
-
- loop_header:
- a1 = phi < a0, a2 >
- a3 = ...
- a2 = operation (a3, a1)
-
- such that:
- 1. operation is commutative and associative and it is safe to
- change the order of the computation.
- 2. no uses for a2 in the loop (a2 is used out of the loop)
- 3. no uses of a1 in the loop besides the reduction operation.
-
- Condition 1 is tested here.
- Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */
-
-tree
-vect_is_simple_reduction (loop_vec_info loop_info, tree phi)
-{
- struct loop *loop = (bb_for_stmt (phi))->loop_father;
- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
- edge latch_e = loop_latch_edge (loop);
- tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
- tree def_stmt, def1, def2;
- enum tree_code code;
- int op_type;
- tree operation, op1, op2;
- tree type;
- int nloop_uses;
- tree name;
- imm_use_iterator imm_iter;
- use_operand_p use_p;
-
- gcc_assert (loop == vect_loop || flow_loop_nested_p (vect_loop, loop));
-
- name = PHI_RESULT (phi);
- nloop_uses = 0;
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
- {
- tree use_stmt = USE_STMT (use_p);
- if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
- && vinfo_for_stmt (use_stmt)
- && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
- nloop_uses++;
- if (nloop_uses > 1)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "reduction used in loop.");
- return NULL_TREE;
- }
- }
-
- if (TREE_CODE (loop_arg) != SSA_NAME)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "reduction: not ssa_name: ");
- print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
- }
- return NULL_TREE;
- }
-
- def_stmt = SSA_NAME_DEF_STMT (loop_arg);
- if (!def_stmt)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "reduction: no def_stmt.");
- return NULL_TREE;
- }
-
- if (TREE_CODE (def_stmt) != GIMPLE_MODIFY_STMT)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
- return NULL_TREE;
- }
-
- name = GIMPLE_STMT_OPERAND (def_stmt, 0);
- nloop_uses = 0;
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
- {
- tree use_stmt = USE_STMT (use_p);
- if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
- && vinfo_for_stmt (use_stmt)
- && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
- nloop_uses++;
- if (nloop_uses > 1)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "reduction used in loop.");
- return NULL_TREE;
- }
- }
-
- operation = GIMPLE_STMT_OPERAND (def_stmt, 1);
- code = TREE_CODE (operation);
- if (!commutative_tree_code (code) || !associative_tree_code (code))
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "reduction: not commutative/associative: ");
- print_generic_expr (vect_dump, operation, TDF_SLIM);
- }
- return NULL_TREE;
- }
-
- op_type = TREE_OPERAND_LENGTH (operation);
- if (op_type != binary_op)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "reduction: not binary operation: ");
- print_generic_expr (vect_dump, operation, TDF_SLIM);
- }
- return NULL_TREE;
- }
-
- op1 = TREE_OPERAND (operation, 0);
- op2 = TREE_OPERAND (operation, 1);
- if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "reduction: uses not ssa_names: ");
- print_generic_expr (vect_dump, operation, TDF_SLIM);
- }
- return NULL_TREE;
- }
-
- /* Check that it's ok to change the order of the computation. */
- type = TREE_TYPE (operation);
- if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
- || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "reduction: multiple types: operation type: ");
- print_generic_expr (vect_dump, type, TDF_SLIM);
- fprintf (vect_dump, ", operands types: ");
- print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
- fprintf (vect_dump, ",");
- print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
- }
- return NULL_TREE;
- }
-
- /* Generally, when vectorizing a reduction we change the order of the
- computation. This may change the behavior of the program in some
- cases, so we need to check that this is ok. One exception is when
- vectorizing an outer-loop: the inner-loop is executed sequentially,
- and therefore vectorizing reductions in the inner-loop during
- outer-loop vectorization is safe. */
-
- /* CHECKME: check for !flag_finite_math_only too? */
- if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
- && !nested_in_vect_loop_p (vect_loop, def_stmt))
- {
- /* Changing the order of operations changes the semantics. */
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
- print_generic_expr (vect_dump, operation, TDF_SLIM);
- }
- return NULL_TREE;
- }
- else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
- && !nested_in_vect_loop_p (vect_loop, def_stmt))
- {
- /* Changing the order of operations changes the semantics. */
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "reduction: unsafe int math optimization: ");
- print_generic_expr (vect_dump, operation, TDF_SLIM);
- }
- return NULL_TREE;
- }
- else if (SAT_FIXED_POINT_TYPE_P (type))
- {
- /* Changing the order of operations changes the semantics. */
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "reduction: unsafe fixed-point math optimization: ");
- print_generic_expr (vect_dump, operation, TDF_SLIM);
- }
- return NULL_TREE;
- }
-
- /* reduction is safe. we're dealing with one of the following:
- 1) integer arithmetic and no trapv
- 2) floating point arithmetic, and special flags permit this optimization.
- */
- def1 = SSA_NAME_DEF_STMT (op1);
- def2 = SSA_NAME_DEF_STMT (op2);
- if (!def1 || !def2 || IS_EMPTY_STMT (def1) || IS_EMPTY_STMT (def2))
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "reduction: no defs for operands: ");
- print_generic_expr (vect_dump, operation, TDF_SLIM);
- }
- return NULL_TREE;
- }
-
-
- /* Check that one def is the reduction def, defined by PHI,
- the other def is either defined in the loop ("vect_loop_def"),
- or it's an induction (defined by a loop-header phi-node). */
-
- if (def2 == phi
- && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
- && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT
- || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def
- || (TREE_CODE (def1) == PHI_NODE
- && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def
- && !is_loop_header_bb_p (bb_for_stmt (def1)))))
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "detected reduction:");
- print_generic_expr (vect_dump, operation, TDF_SLIM);
- }
- return def_stmt;
- }
- else if (def1 == phi
- && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
- && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT
- || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def
- || (TREE_CODE (def2) == PHI_NODE
- && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def
- && !is_loop_header_bb_p (bb_for_stmt (def2)))))
- {
- /* Swap operands (just for simplicity - so that the rest of the code
- can assume that the reduction variable is always the last (second)
- argument). */
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "detected reduction: need to swap operands:");
- print_generic_expr (vect_dump, operation, TDF_SLIM);
- }
- swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
- &TREE_OPERAND (operation, 1));
- return def_stmt;
- }
- else
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "reduction: unknown pattern.");
- print_generic_expr (vect_dump, operation, TDF_SLIM);
- }
- return NULL_TREE;
- }
-}
-
-
-/* Function vect_is_simple_iv_evolution.
-
- FORNOW: A simple evolution of an induction variables in the loop is
- considered a polynomial evolution with constant step. */
-
-bool
-vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
- tree * step)
-{
- tree init_expr;
- tree step_expr;
- tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
-
- /* When there is no evolution in this loop, the evolution function
- is not "simple". */
- if (evolution_part == NULL_TREE)
- return false;
-
- /* When the evolution is a polynomial of degree >= 2
- the evolution function is not "simple". */
- if (tree_is_chrec (evolution_part))
- return false;
-
- step_expr = evolution_part;
- init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
-
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "step: ");
- print_generic_expr (vect_dump, step_expr, TDF_SLIM);
- fprintf (vect_dump, ", init: ");
- print_generic_expr (vect_dump, init_expr, TDF_SLIM);
- }
-
- *init = init_expr;
- *step = step_expr;
-
- if (TREE_CODE (step_expr) != INTEGER_CST)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "step unknown.");
- return false;
- }
-
- return true;
-}
-
-
-/* Function vectorize_loops.
-
- Entry Point to loop vectorization phase. */
-
-unsigned
-vectorize_loops (void)
-{
- unsigned int i;
- unsigned int num_vectorized_loops = 0;
- unsigned int vect_loops_num;
- loop_iterator li;
- struct loop *loop;
-
- vect_loops_num = number_of_loops ();
-
- /* Bail out if there are no loops. */
- if (vect_loops_num <= 1)
- return 0;
-
- /* Fix the verbosity level if not defined explicitly by the user. */
- vect_set_dump_settings ();
-
- /* Allocate the bitmap that records which virtual variables that
- need to be renamed. */
- vect_memsyms_to_rename = BITMAP_ALLOC (NULL);
-
- /* ----------- Analyze loops. ----------- */
-
- /* If some loop was duplicated, it gets bigger number
- than all previously defined loops. This fact allows us to run
- only over initial loops skipping newly generated ones. */
- FOR_EACH_LOOP (li, loop, 0)
- {
- loop_vec_info loop_vinfo;
-
- vect_loop_location = find_loop_location (loop);
- loop_vinfo = vect_analyze_loop (loop);
- loop->aux = loop_vinfo;
-
- if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
- continue;
-
- vect_transform_loop (loop_vinfo);
- num_vectorized_loops++;
- }
- vect_loop_location = UNKNOWN_LOC;
-
- statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)
- || (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
- && num_vectorized_loops > 0))
- fprintf (vect_dump, "vectorized %u loops in function.\n",
- num_vectorized_loops);
-
- /* ----------- Finalize. ----------- */
-
- BITMAP_FREE (vect_memsyms_to_rename);
-
- for (i = 1; i < vect_loops_num; i++)
- {
- loop_vec_info loop_vinfo;
-
- loop = get_loop (i);
- if (!loop)
- continue;
- loop_vinfo = loop->aux;
- destroy_loop_vec_info (loop_vinfo, true);
- loop->aux = NULL;
- }
-
- return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0;
-}
-
-/* Increase alignment of global arrays to improve vectorization potential.
- TODO:
- - Consider also structs that have an array field.
- - Use ipa analysis to prune arrays that can't be vectorized?
- This should involve global alignment analysis and in the future also
- array padding. */
-
-static unsigned int
-increase_alignment (void)
-{
- struct varpool_node *vnode;
-
- /* Increase the alignment of all global arrays for vectorization. */
- for (vnode = varpool_nodes_queue;
- vnode;
- vnode = vnode->next_needed)
- {
- tree vectype, decl = vnode->decl;
- unsigned int alignment;
-
- if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
- continue;
- vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl)));
- if (!vectype)
- continue;
- alignment = TYPE_ALIGN (vectype);
- if (DECL_ALIGN (decl) >= alignment)
- continue;
-
- if (vect_can_force_dr_alignment_p (decl, alignment))
- {
- DECL_ALIGN (decl) = TYPE_ALIGN (vectype);
- DECL_USER_ALIGN (decl) = 1;
- if (dump_file)
- {
- fprintf (dump_file, "Increasing alignment of decl: ");
- print_generic_expr (dump_file, decl, TDF_SLIM);
- }
- }
- }
- return 0;
-}
-
-static bool
-gate_increase_alignment (void)
-{
- return flag_section_anchors && flag_tree_vectorize;
-}
-
-struct simple_ipa_opt_pass pass_ipa_increase_alignment =
-{
- {
- SIMPLE_IPA_PASS,
- "increase_alignment", /* name */
- gate_increase_alignment, /* gate */
- increase_alignment, /* execute */
- NULL, /* sub */
- NULL, /* next */
- 0, /* static_pass_number */
- 0, /* tv_id */
- 0, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0 /* todo_flags_finish */
- }
-};