From b3372425ecf3e225d7a2b3c73e061e11498b6f74 Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Thu, 4 Jun 2020 10:09:01 +0800 Subject: [PATCH] vect: Rename fully_masked_p to using_partial_vectors_p Power supports vector memory access with length (in bytes) instructions. Like existing fully masking for SVE, it is another approach to vectorize the loop using partially-populated vectors. As Richard Sandiford suggested, this patch is to update the existing fully_masked_p field to using_partial_vectors_p. Introduce one macro LOOP_VINFO_USING_PARTIAL_VECTORS_P for partial vectorization checking usage, update the LOOP_VINFO_FULLY_MASKED_P with LOOP_VINFO_USING_PARTIAL_VECTORS_P && !masks.is_empty() and still use it for mask-based partial vectors approach specific checks. Bootstrapped/regtested on aarch64-linux-gnu. gcc/ChangeLog: * tree-vect-loop-manip.c (vect_set_loop_condition): Rename LOOP_VINFO_FULLY_MASKED_P to LOOP_VINFO_USING_PARTIAL_VECTORS_P. (vect_gen_vector_loop_niters): Likewise. (vect_do_peeling): Likewise. * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Rename fully_masked_p to using_partial_vectors_p. (vect_analyze_loop_costing): Rename LOOP_VINFO_FULLY_MASKED_P to LOOP_VINFO_USING_PARTIAL_VECTORS_P. (determine_peel_for_niter): Likewise. (vect_estimate_min_profitable_iters): Likewise. (vect_transform_loop): Likewise. * tree-vectorizer.h (LOOP_VINFO_FULLY_MASKED_P): Updated. (LOOP_VINFO_USING_PARTIAL_VECTORS_P): New macro. --- gcc/tree-vect-loop-manip.c | 6 +++--- gcc/tree-vect-loop.c | 32 +++++++++++++++++--------------- gcc/tree-vectorizer.h | 11 ++++++++--- 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 8c5e696b995..4bb1c19e8d8 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -912,7 +912,7 @@ vect_set_loop_condition (class loop *loop, loop_vec_info loop_vinfo, gcond *orig_cond = get_loop_exit_condition (loop); gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond); - if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters, final_iv, niters_maybe_zero, loop_cond_gsi); @@ -1939,7 +1939,7 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, unsigned HOST_WIDE_INT const_vf; if (vf.is_constant (&const_vf) - && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) { /* Create: niters >> log2(vf) */ /* If it's known that niters == number of latch executions + 1 doesn't @@ -2471,7 +2471,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); poly_uint64 bound_epilog = 0; - if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)) bound_epilog += vf - 1; if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index cec903de047..a9a32e55063 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -815,7 +815,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) vec_inside_cost (0), vectorizable (false), can_use_partial_vectors_p (true), - fully_masked_p (false), + using_partial_vectors_p (false), peeling_for_gaps (false), peeling_for_niter (false), no_data_dependencies (false), @@ -1634,9 +1634,9 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo) class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); - /* Only fully-masked loops can have iteration counts less than the - vectorization factor. */ - if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + /* Only loops that can handle partially-populated vectors can have iteration + counts less than the vectorization factor. */ + if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) { HOST_WIDE_INT max_niter; @@ -1872,7 +1872,7 @@ determine_peel_for_niter (loop_vec_info loop_vinfo) th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)); - if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) /* The main loop handles all iterations. */ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) @@ -2146,7 +2146,7 @@ start_over: /* Decide whether to use a fully-masked loop for this vectorization factor. */ - LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) && vect_verify_full_masking (loop_vinfo)); if (dump_enabled_p ()) @@ -2164,7 +2164,7 @@ start_over: enough iterations for vectorization. */ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) { poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo); @@ -2175,10 +2175,11 @@ start_over: " support peeling for gaps.\n"); } - /* If we're vectorizing an epilogue loop, we either need a fully-masked - loop or a loop that has a lower VF than the main loop. */ + /* If we're vectorizing an epilogue loop, the vectorized loop either needs + to be able to handle fewer than VF scalars, or needs to have a lower VF + than the main loop. */ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo) - && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo), LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))) return opt_result::failure_at (vect_location, @@ -2249,7 +2250,7 @@ start_over: } /* Niters for at least one iteration of vectorized loop. */ - if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo); /* One additional iteration because of peeling for gap. */ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) @@ -3826,7 +3827,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, " Calculated minimum iters for profitability: %d\n", min_profitable_iters); - if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) && min_profitable_iters < (assumed_vf + peel_iters_prologue)) /* We want the vectorized loop to execute at least once. */ min_profitable_iters = assumed_vf + peel_iters_prologue; @@ -8582,7 +8583,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) if (niters_vector == NULL_TREE) { if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) && known_eq (lowest_vf, vf)) { niters_vector @@ -8751,7 +8752,8 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) /* True if the final iteration might not handle a full vector's worth of scalar iterations. */ - bool final_iter_may_be_partial = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + bool final_iter_may_be_partial + = LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo); /* The minimum number of iterations performed by the epilogue. This is 1 when peeling for gaps because we always need a final scalar iteration. */ @@ -8762,7 +8764,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) int bias_for_lowest = 1 - min_epilogue_iters; int bias_for_assumed = bias_for_lowest; int alignment_npeels = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); - if (alignment_npeels && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + if (alignment_npeels && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) { /* When the amount of peeling is known at compile time, the first iteration will have exactly alignment_npeels active elements. diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index c459280c051..9be4d1599ee 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -637,8 +637,9 @@ public: fewer than VF scalars. */ bool can_use_partial_vectors_p; - /* True if have decided to use a fully-masked loop. */ - bool fully_masked_p; + /* True if we've decided to use partially-populated vectors, so that + the vector loop can handle fewer than VF scalars. */ + bool using_partial_vectors_p; /* When we have grouped data accesses with gaps, we may introduce invalid memory accesses. We peel the last iteration of the loop to prevent @@ -702,7 +703,7 @@ public: #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable #define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p -#define LOOP_VINFO_FULLY_MASKED_P(L) (L)->fully_masked_p +#define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor #define LOOP_VINFO_MASKS(L) (L)->masks @@ -739,6 +740,10 @@ public: #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond +#define LOOP_VINFO_FULLY_MASKED_P(L) \ + (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \ + && !LOOP_VINFO_MASKS (L).is_empty ()) + #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ ((L)->may_misalign_stmts.length () > 0) #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ -- 2.30.2