From 9adee3052156ae36271be28e3ad47dd975c26f42 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 4 Aug 2017 10:40:35 +0000 Subject: [PATCH] Use base inequality for some vector alias checks This patch checks whether two data references x and y cannot partially overlap and so are independent whenever &x != &y. We can then use this in the vectoriser to optimise alias checks. gcc/ 2016-08-04 Richard Sandiford * hash-traits.h (pair_hash): New struct. * tree-data-ref.h (data_dependence_relation): Add object_a and object_b fields. (DDR_OBJECT_A, DDR_OBJECT_B): New macros. * tree-data-ref.c (initialize_data_dependence_relation): Initialize DDR_OBJECT_A and DDR_OBJECT_B. * tree-vectorizer.h (vec_object_pair): New type. (_loop_vec_info): Add a check_unequal_addrs field. (LOOP_VINFO_CHECK_UNEQUAL_ADDRS): New macro. (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Return true if there is an entry in check_unequal_addrs. Check comp_alias_ddrs instead of may_alias_ddrs. * tree-vect-loop.c (destroy_loop_vec_info): Release LOOP_VINFO_CHECK_UNEQUAL_ADDRS. (vect_analyze_loop_2): Likewise, when restarting. (vect_estimate_min_profitable_iters): Estimate the cost of LOOP_VINFO_CHECK_UNEQUAL_ADDRS. * tree-vect-data-refs.c: Include tree-hash-traits.h. (vect_prune_runtime_alias_test_list): Try to handle conflicts using LOOP_VINFO_CHECK_UNEQUAL_ADDRS, if the data dependence allows. Count such tests in the final summary. * tree-vect-loop-manip.c (chain_cond_expr): New function. (vect_create_cond_for_align_checks): Use it. (vect_create_cond_for_unequal_addrs): New function. (vect_loop_versioning): Call it. gcc/testsuite/ * gcc.dg/vect/vect-alias-check-6.c: New test. From-SVN: r250868 --- gcc/hash-traits.h | 70 +++++++++++++++++++ .../gcc.dg/vect/vect-alias-check-6.c | 23 ++++++ gcc/tree-data-ref.c | 9 +++ gcc/tree-data-ref.h | 9 +++ gcc/tree-vect-data-refs.c | 39 +++++++++-- gcc/tree-vect-loop-manip.c | 45 ++++++++++-- gcc/tree-vect-loop.c | 8 +++ gcc/tree-vectorizer.h | 11 ++- 8 files changed, 200 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c diff --git a/gcc/hash-traits.h b/gcc/hash-traits.h index f1bfd9faa8d..a5c4f103474 100644 --- a/gcc/hash-traits.h +++ b/gcc/hash-traits.h @@ -301,6 +301,76 @@ struct ggc_cache_ptr_hash : pointer_hash , ggc_cache_remove {}; struct nofree_string_hash : string_hash, typed_noop_remove {}; +/* Traits for pairs of values, using the first to record empty and + deleted slots. */ + +template +struct pair_hash +{ + typedef std::pair value_type; + typedef std::pair compare_type; + + static inline hashval_t hash (const value_type &); + static inline bool equal (const value_type &, const compare_type &); + static inline void remove (value_type &); + static inline void mark_deleted (value_type &); + static inline void mark_empty (value_type &); + static inline bool is_deleted (const value_type &); + static inline bool is_empty (const value_type &); +}; + +template +inline hashval_t +pair_hash ::hash (const value_type &x) +{ + return iterative_hash_hashval_t (T1::hash (x.first), T2::hash (x.second)); +} + +template +inline bool +pair_hash ::equal (const value_type &x, const compare_type &y) +{ + return T1::equal (x.first, y.first) && T2::equal (x.second, y.second); +} + +template +inline void +pair_hash ::remove (value_type &x) +{ + T1::remove (x.first); + T2::remove (x.second); +} + +template +inline void +pair_hash ::mark_deleted (value_type &x) +{ + T1::mark_deleted (x.first); +} + +template +inline void +pair_hash ::mark_empty (value_type &x) +{ + T1::mark_empty (x.first); +} + +template +inline bool +pair_hash ::is_deleted (const value_type &x) +{ + return T1::is_deleted (x.first); +} + +template +inline bool +pair_hash ::is_empty (const value_type &x) +{ + return T1::is_empty (x.first); +} + template struct default_hash_traits : T {}; template diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c new file mode 100644 index 00000000000..0993c69a5bd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +#define N 16 + +struct s { int x[N]; }; + +void +f1 (struct s *a, struct s *b) +{ + for (int i = 0; i < N - 1; ++i) + a->x[i + 1] += b->x[i]; +} + +void +f2 (struct s *a, struct s *b) +{ + for (int i = 0; i < N; ++i) + a->x[i] += b->x[N - i - 1]; +} + +/* { dg-final { scan-tree-dump-times {checking that [^\n]* and [^\n]* have different addresses} 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 619a651486b..4956c030f10 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -2506,6 +2506,15 @@ initialize_data_dependence_relation (struct data_reference *a, } DDR_COULD_BE_INDEPENDENT_P (res) = true; + if (!loop_nest.exists () + || (object_address_invariant_in_loop_p (loop_nest[0], + full_seq.object_a) + && object_address_invariant_in_loop_p (loop_nest[0], + full_seq.object_b))) + { + DDR_OBJECT_A (res) = full_seq.object_a; + DDR_OBJECT_B (res) = full_seq.object_b; + } } DDR_AFFINE_P (res) = true; diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index ef02df7b179..0f22d9dfe2a 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -309,6 +309,13 @@ struct data_dependence_relation but the analyzer cannot be more specific. */ tree are_dependent; + /* If nonnull, COULD_BE_INDEPENDENT_P is true and the accesses are + independent when the runtime addresses of OBJECT_A and OBJECT_B + are different. The addresses of both objects are invariant in the + loop nest. */ + tree object_a; + tree object_b; + /* For each subscript in the dependence test, there is an element in this array. This is the attribute that labels the edge A->B of the data_dependence_relation. */ @@ -372,6 +379,8 @@ typedef struct data_dependence_relation *ddr_p; #define DDR_B(DDR) (DDR)->b #define DDR_AFFINE_P(DDR) (DDR)->affine_p #define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent +#define DDR_OBJECT_A(DDR) (DDR)->object_a +#define DDR_OBJECT_B(DDR) (DDR)->object_b #define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I] #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length () diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 377cb90bbb0..a91e304327b 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "params.h" #include "tree-cfg.h" +#include "tree-hash-traits.h" /* Return true if load- or store-lanes optab OPTAB is implemented for COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ @@ -2986,10 +2987,14 @@ dependence_distance_ge_vf (data_dependence_relation *ddr, bool vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) { - vec may_alias_ddrs = - LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); - vec& comp_alias_ddrs = - LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); + typedef pair_hash tree_pair_hash; + hash_set compared_objects; + + vec may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); + vec &comp_alias_ddrs + = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); + vec &check_unequal_addrs + = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); @@ -3024,6 +3029,24 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor)) continue; + if (DDR_OBJECT_A (ddr)) + { + vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr)); + if (!compared_objects.add (new_pair)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "checking that "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first); + dump_printf (MSG_NOTE, " and "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second); + dump_printf (MSG_NOTE, " have different addresses\n"); + } + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair); + } + continue; + } + dr_a = DDR_A (ddr); stmt_a = DR_STMT (DDR_A (ddr)); dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a)); @@ -3085,11 +3108,13 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) prune_runtime_alias_test_list (&comp_alias_ddrs, (unsigned HOST_WIDE_INT) vect_factor); + + unsigned int count = (comp_alias_ddrs.length () + + check_unequal_addrs.length ()); dump_printf_loc (MSG_NOTE, vect_location, "improved number of alias checks from %d to %d\n", - may_alias_ddrs.length (), comp_alias_ddrs.length ()); - if ((int) comp_alias_ddrs.length () > - PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) + may_alias_ddrs.length (), count); + if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 97080a61ff7..f78e4b420c3 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1944,6 +1944,19 @@ vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr) *cond_expr = part_cond_expr; } +/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR + and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */ + +static void +chain_cond_expr (tree *cond_expr, tree part_cond_expr) +{ + if (*cond_expr) + *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, + *cond_expr, part_cond_expr); + else + *cond_expr = part_cond_expr; +} + /* Function vect_create_cond_for_align_checks. Create a conditional expression that represents the alignment checks for @@ -2054,11 +2067,28 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, ptrsize_zero = build_int_cst (int_ptrsize_type, 0); part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node, and_tmp_name, ptrsize_zero); - if (*cond_expr) - *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, - *cond_expr, part_cond_expr); - else - *cond_expr = part_cond_expr; + chain_cond_expr (cond_expr, part_cond_expr); +} + +/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains , ..., , + create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn). + Set *COND_EXPR to a tree that is true when both the original *COND_EXPR + and this new condition are true. Treat a null *COND_EXPR as "true". */ + +static void +vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr) +{ + vec pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); + unsigned int i; + vec_object_pair *pair; + FOR_EACH_VEC_ELT (pairs, i, pair) + { + tree addr1 = build_fold_addr_expr (pair->first); + tree addr2 = build_fold_addr_expr (pair->second); + tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node, + addr1, addr2); + chain_cond_expr (cond_expr, part_cond_expr); + } } /* Function vect_create_cond_for_alias_checks. @@ -2158,7 +2188,10 @@ vect_loop_versioning (loop_vec_info loop_vinfo, &cond_expr_stmt_list); if (version_alias) - vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); + { + vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr); + vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); + } cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list, is_gimple_condexpr, NULL_TREE); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 8740a7573ac..9723f134a3c 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1275,6 +1275,8 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts) destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); loop_vinfo->scalar_cost_vec.release (); + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); + free (loop_vinfo); loop->aux = NULL; } @@ -2343,6 +2345,7 @@ again: } /* Free optimized alias test DDRS. */ LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release (); + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); /* Reset target cost data. */ destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) @@ -3434,6 +3437,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length (); (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0, vect_prologue); + len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length (); + if (len) + /* Count LEN - 1 ANDs and LEN comparisons. */ + (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt, + NULL, 0, vect_prologue); dump_printf (MSG_NOTE, "cost model: Adding cost of checks for loop " "versioning aliasing.\n"); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index cae0668bb45..01c416c7cbf 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -149,6 +149,10 @@ typedef struct _slp_instance { +/* Describes two objects whose addresses must be unequal for the vectorized + loop to be valid. */ +typedef std::pair vec_object_pair; + /* Vectorizer state common between loop and basic-block vectorization. */ struct vec_info { enum { bb, loop } kind; @@ -245,6 +249,9 @@ typedef struct _loop_vec_info : public vec_info { lengths from which the run-time aliasing check is built. */ vec comp_alias_ddrs; + /* Check that the addresses of each pair of objects is unequal. */ + vec check_unequal_addrs; + /* Statements in the loop that have data references that are candidates for a runtime (loop versioning) misalignment check. */ vec may_misalign_stmts; @@ -339,6 +346,7 @@ typedef struct _loop_vec_info : public vec_info { #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs +#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor @@ -358,7 +366,8 @@ typedef struct _loop_vec_info : public vec_info { #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ ((L)->may_misalign_stmts.length () > 0) #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ - ((L)->comp_alias_ddrs.length () > 0) + ((L)->comp_alias_ddrs.length () > 0 \ + || (L)->check_unequal_addrs.length () > 0) #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) #define LOOP_REQUIRES_VERSIONING(L) \ -- 2.30.2