From: Richard Sandiford Date: Sat, 16 Nov 2019 11:43:31 +0000 (+0000) Subject: Use a single comparison for index-based alias checks X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f9d6338bd15ce1fae36bf25d3a0545e9678ddc58;p=gcc.git Use a single comparison for index-based alias checks This patch rewrites the index-based alias checks to use conditions of the form: (unsigned T) (a - b + bias) <= limit E.g. before the patch: struct s { int x[100]; }; void f1 (struct s *s1, int a, int b) { for (int i = 0; i < 32; ++i) s1->x[i + a] += s1->x[i + b]; } used: add w3, w1, 3 cmp w3, w2 add w3, w2, 3 ccmp w1, w3, 0, ge ble .L2 whereas after the patch it uses: sub w3, w1, w2 add w3, w3, 3 cmp w3, 6 bls .L2 The patch also fixes the seg_len1 and seg_len2 negation for cases in which seg_len is a "negative unsigned" value narrower than 64 bits, like it is for 32-bit targets. Previously we'd end up with values like 0xffffffff000000001 instead of 1. 2019-11-16 Richard Sandiford gcc/ * tree-data-ref.c (create_intersect_range_checks_index): Rewrite the index tests to have the form (unsigned T) (B - A + bias) <= limit. gcc/testsuite/ * gcc.dg/vect/vect-alias-check-18.c: New test. * gcc.dg/vect/vect-alias-check-19.c: Likewise. * gcc.dg/vect/vect-alias-check-20.c: Likewise. From-SVN: r278354 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c194a0aa58c..e99cfe70d10 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2019-11-16 Richard Sandiford + + * tree-data-ref.c (create_intersect_range_checks_index): Rewrite + the index tests to have the form (unsigned T) (B - A + bias) <= limit. + 2019-11-16 Richard Sandiford * tree-data-ref.c (create_intersect_range_checks_index) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c701507ec3e..ef5bd71af8e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2019-11-16 Richard Sandiford + + * gcc.dg/vect/vect-alias-check-18.c: New test. + * gcc.dg/vect/vect-alias-check-19.c: Likewise. + * gcc.dg/vect/vect-alias-check-20.c: Likewise. + 2019-11-16 Richard Sandiford * gcc.dg/vect/vect-alias-check-1.c: Test for the type of alias check. diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c new file mode 100644 index 00000000000..e9fd31a1400 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c @@ -0,0 +1,64 @@ +#define N 200 +#define DIST 32 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE(I) ((I) * 11 / 2) + +#define ADD_TEST(TYPE) \ + TYPE a_##TYPE[N * 2]; \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (int x, int y) \ + { \ + for (int i = 0; i < N; ++i) \ + a_##TYPE[x - i] += a_##TYPE[y - i]; \ + } + +#define DO_TEST(TYPE) \ + for (int i = 0; i < DIST * 2; ++i) \ + { \ + for (int j = 0; j < N + DIST * 2; ++j) \ + a_##TYPE[j] = TEST_VALUE (j); \ + test_##TYPE (i + N - 1, DIST + N - 1); \ + for (int j = 0; j < N + DIST * 2; ++j) \ + { \ + TYPE expected; \ + if (j < i || j >= i + N) \ + expected = TEST_VALUE (j); \ + else if (i >= DIST) \ + expected = ((TYPE) TEST_VALUE (j) \ + + (TYPE) TEST_VALUE (j + DIST - i)); \ + else \ + expected = ((TYPE) TEST_VALUE (j) \ + + a_##TYPE[j + DIST - i]); \ + if (expected != a_##TYPE[j]) \ + __builtin_abort (); \ + } \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} + +/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */ +/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c new file mode 100644 index 00000000000..583e296f7f1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c @@ -0,0 +1,62 @@ +#define N 200 +#define DIST 32 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define ADD_TEST(TYPE) \ + TYPE a_##TYPE[N * 2]; \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (int x, int y) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + a_##TYPE[i + x] = i; \ + a_##TYPE[i + y] = 42 - i * 2; \ + } \ + } + +#define DO_TEST(TYPE) \ + for (int i = 0; i < DIST * 2; ++i) \ + { \ + __builtin_memset (a_##TYPE, 0, sizeof (a_##TYPE)); \ + test_##TYPE (DIST, i); \ + for (int j = 0; j < N + DIST * 2; ++j) \ + { \ + TYPE expected = 0; \ + if (i > DIST && j >= i && j < i + N) \ + expected = 42 - (j - i) * 2; \ + if (j >= DIST && j < DIST + N) \ + expected = j - DIST; \ + if (i <= DIST && j >= i && j < i + N) \ + expected = 42 - (j - i) * 2; \ + if (expected != a_##TYPE[j]) \ + __builtin_abort (); \ + } \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} + +/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */ +/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c new file mode 100644 index 00000000000..8a699ebfda8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c @@ -0,0 +1,66 @@ +#define N 200 +#define DIST 32 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE(I) ((I) * 11 / 2) + +#define ADD_TEST(TYPE) \ + TYPE a_##TYPE[N * 2]; \ + TYPE __attribute__((noinline, noclone)) \ + test_##TYPE (int x, int y) \ + { \ + TYPE res = 0; \ + for (int i = 0; i < N; ++i) \ + { \ + a_##TYPE[i + x] = i; \ + res += a_##TYPE[i + y]; \ + } \ + return res; \ + } + +#define DO_TEST(TYPE) \ + for (int i = 0; i < DIST * 2; ++i) \ + { \ + for (int j = 0; j < N + DIST * 2; ++j) \ + a_##TYPE[j] = TEST_VALUE (j); \ + TYPE res = test_##TYPE (DIST, i); \ + for (int j = 0; j < N; ++j) \ + if (a_##TYPE[j + DIST] != (TYPE) j) \ + __builtin_abort (); \ + TYPE expected_res = 0; \ + for (int j = i; j < i + N; ++j) \ + if (i <= DIST && j >= DIST && j < DIST + N) \ + expected_res += j - DIST; \ + else \ + expected_res += TEST_VALUE (j); \ + if (expected_res != res) \ + __builtin_abort (); \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} + +/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */ +/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */ diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 527cb5eca9c..4dfa33404e3 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1743,7 +1743,9 @@ prune_runtime_alias_test_list (vec *alias_pairs, We can create expression based on index rather than address: - (i_0 + 4 < j_0 || j_0 + 4 < i_0) + (unsigned) (i_0 - j_0 + 3) <= 6 + + i.e. the indices are less than 4 apart. Note evolution step of index needs to be considered in comparison. */ @@ -1780,15 +1782,8 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, if (neg_step) { abs_step = -abs_step; - seg_len1 = -seg_len1; - seg_len2 = -seg_len2; - } - else - { - /* Include the access size in the length, so that we only have one - tree addition below. */ - seg_len1 += dr_a.access_size; - seg_len2 += dr_b.access_size; + seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi (); + seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi (); } /* Infer the number of iterations with which the memory segment is accessed @@ -1802,16 +1797,13 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2)) return false; - poly_uint64 niter_access1 = 0, niter_access2 = 0; - if (neg_step) - { - /* Divide each access size by the byte step, rounding up. */ - if (!can_div_trunc_p (dr_a.access_size - abs_step - 1, - abs_step, &niter_access1) - || !can_div_trunc_p (dr_b.access_size + abs_step - 1, - abs_step, &niter_access2)) - return false; - } + /* Divide each access size by the byte step, rounding up. */ + poly_uint64 niter_access1, niter_access2; + if (!can_div_trunc_p (dr_a.access_size + abs_step - 1, + abs_step, &niter_access1) + || !can_div_trunc_p (dr_b.access_size + abs_step - 1, + abs_step, &niter_access2)) + return false; unsigned int i; for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++) @@ -1851,38 +1843,87 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, index of data reference. Like segment length, index length is linear function of the number of iterations with index_step as the coefficient, i.e, niter_len * idx_step. */ - tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step, - build_int_cst (TREE_TYPE (min1), - niter_len1)); - tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step, - build_int_cst (TREE_TYPE (min2), - niter_len2)); - tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1); - tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2); - /* Adjust ranges for negative step. */ + offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step), + SIGNED); if (neg_step) - { - /* IDX_LEN1 and IDX_LEN2 are negative in this case. */ - std::swap (min1, max1); - std::swap (min2, max2); - - /* As with the lengths just calculated, we've measured the access - sizes in iterations, so multiply them by the index step. */ - tree idx_access1 - = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step, - build_int_cst (TREE_TYPE (min1), niter_access1)); - tree idx_access2 - = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step, - build_int_cst (TREE_TYPE (min2), niter_access2)); - - /* MINUS_EXPR because the above values are negative. */ - max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1); - max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2); - } - tree part_cond_expr - = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, - fold_build2 (LE_EXPR, boolean_type_node, max1, min2), - fold_build2 (LE_EXPR, boolean_type_node, max2, min1)); + abs_idx_step = -abs_idx_step; + poly_offset_int idx_len1 = abs_idx_step * niter_len1; + poly_offset_int idx_len2 = abs_idx_step * niter_len2; + poly_offset_int idx_access1 = abs_idx_step * niter_access1; + poly_offset_int idx_access2 = abs_idx_step * niter_access2; + + gcc_assert (known_ge (idx_len1, 0) + && known_ge (idx_len2, 0) + && known_ge (idx_access1, 0) + && known_ge (idx_access2, 0)); + + /* Each access has the following pattern, with lengths measured + in units of INDEX: + + <-- idx_len --> + <--- A: -ve step ---> + +-----+-------+-----+-------+-----+ + | n-1 | ..... | 0 | ..... | n-1 | + +-----+-------+-----+-------+-----+ + <--- B: +ve step ---> + <-- idx_len --> + | + min + + where "n" is the number of scalar iterations covered by the segment + and where each access spans idx_access units. + + A is the range of bytes accessed when the step is negative, + B is the range when the step is positive. + + When checking for general overlap, we need to test whether + the range: + + [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1] + + overlaps: + + [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1] + + where: + + low_offsetN = +ve step ? 0 : -idx_lenN; + high_offsetN = +ve step ? idx_lenN : 0; + + This is equivalent to testing whether: + + min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1 + && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1 + + Converting this into a single test, there is an overlap if: + + 0 <= min2 - min1 + bias <= limit + + where bias = high_offset2 + idx_access2 - 1 - low_offset1 + limit = (high_offset1 - low_offset1 + idx_access1 - 1) + + (high_offset2 - low_offset2 + idx_access2 - 1) + i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1 + + Combining the tests requires limit to be computable in an unsigned + form of the index type; if it isn't, we fall back to the usual + pointer-based checks. */ + poly_offset_int limit = (idx_len1 + idx_access1 - 1 + + idx_len2 + idx_access2 - 1); + tree utype = unsigned_type_for (TREE_TYPE (min1)); + if (!wi::fits_to_tree_p (limit, utype)) + return false; + + poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0; + poly_offset_int high_offset2 = neg_step ? 0 : idx_len2; + poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1; + + tree subject = fold_build2 (MINUS_EXPR, utype, + fold_convert (utype, min2), + fold_convert (utype, min1)); + subject = fold_build2 (PLUS_EXPR, utype, subject, + wide_int_to_tree (utype, bias)); + tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, + wide_int_to_tree (utype, limit)); if (*cond_expr) *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, *cond_expr, part_cond_expr);