+2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
+ * tree-vectorizer.h (vec_lower_bound): New structure.
+ (_loop_vec_info): Add check_nonzero and lower_bounds.
+ (LOOP_VINFO_CHECK_NONZERO): New macro.
+ (LOOP_VINFO_LOWER_BOUNDS): Likewise.
+ (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Check lower_bounds too.
+ * tree-data-ref.h (dr_with_seg_len): Add access_size and align
+ fields. Make seg_len the distance travelled, not including the
+ access size.
+ (dr_direction_indicator): Declare.
+ (dr_zero_step_indicator): Likewise.
+ (dr_known_forward_stride_p): Likewise.
+ * tree-data-ref.c: Include stringpool.h, tree-vrp.h and
+ tree-ssanames.h.
+ (runtime_alias_check_p): Allow runtime alias checks with
+ variable strides.
+ (operator ==): Compare access_size and align.
+ (prune_runtime_alias_test_list): Rework for new distinction between
+ the access_size and seg_len.
+ (create_intersect_range_checks_index): Likewise. Cope with polynomial
+ segment lengths.
+ (get_segment_min_max): New function.
+ (create_intersect_range_checks): Use it.
+ (dr_step_indicator): New function.
+ (dr_direction_indicator): Likewise.
+ (dr_zero_step_indicator): Likewise.
+ (dr_known_forward_stride_p): Likewise.
+ * tree-loop-distribution.c (data_ref_segment_size): Return
+ DR_STEP * (niters - 1).
+ (compute_alias_check_pairs): Update call to the dr_with_seg_len
+ constructor.
+ * tree-vect-data-refs.c (vect_check_nonzero_value): New function.
+ (vect_preserves_scalar_order_p): New function, split out from...
+ (vect_analyze_data_ref_dependence): ...here. Check for zero steps.
+ (vect_vfa_segment_size): Return DR_STEP * (length_factor - 1).
+ (vect_vfa_access_size): New function.
+ (vect_vfa_align): Likewise.
+ (vect_compile_time_alias): Take access_size_a and access_b arguments.
+ (dump_lower_bound): New function.
+ (vect_check_lower_bound): Likewise.
+ (vect_small_gap_p): Likewise.
+ (vectorizable_with_step_bound_p): Likewise.
+ (vect_prune_runtime_alias_test_list): Ignore cross-iteration
+ depencies if the vectorization factor is 1. Convert the checks
+ for nonzero steps into checks on the bounds of DR_STEP. Try using
+ a bunds check for variable steps if the minimum required step is
+ relatively small. Update calls to the dr_with_seg_len
+ constructor and to vect_compile_time_alias.
+ * tree-vect-loop-manip.c (vect_create_cond_for_lower_bounds): New
+ function.
+ (vect_loop_versioning): Call it.
+ * tree-vect-loop.c (vect_analyze_loop_2): Clear LOOP_VINFO_LOWER_BOUNDS
+ when retrying.
+ (vect_estimate_min_profitable_iters): Account for any bounds checks.
+
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
+2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
+ * gcc.dg/vect/bb-slp-cond-1.c: Expect loop vectorization rather
+ than SLP vectorization.
+ * gcc.dg/vect/vect-alias-check-10.c: New test.
+ * gcc.dg/vect/vect-alias-check-11.c: Likewise.
+ * gcc.dg/vect/vect-alias-check-12.c: Likewise.
+ * gcc.dg/vect/vect-alias-check-8.c: Likewise.
+ * gcc.dg/vect/vect-alias-check-9.c: Likewise.
+ * gcc.target/aarch64/sve/strided_load_8.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_1.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_1.h: Likewise.
+ * gcc.target/aarch64/sve/var_stride_1_run.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_2.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_2_run.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_3.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_3_run.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_4.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_4_run.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_5.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_5_run.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_6.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_6_run.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_7.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_7_run.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_8.c: Likewise.
+ * gcc.target/aarch64/sve/var_stride_8_run.c: Likewise.
+ * gfortran.dg/vect/vect-alias-check-1.F90: Likewise.
+
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
return 0;
}
-/* Basic blocks of if-converted loops are vectorized from within the loop
- vectorizer pass. In this case it is really a deficiency in loop
- vectorization data dependence analysis that causes us to require
- basic block vectorization in the first place. */
-
-/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "vect" { target vect_element_align } } } */
+/* { dg-final { scan-tree-dump {(no need for alias check [^\n]* when VF is 1|no alias between [^\n]* when [^\n]* is outside \(-16, 16\))} "vect" { target vect_element_align } } } */
+/* { dg-final { scan-tree-dump-times "loop vectorized" 1 "vect" { target vect_element_align } } } */
--- /dev/null
+/* { dg-do run } */
+
+#define N 87
+#define M 6
+
+typedef signed char sc;
+typedef unsigned char uc;
+typedef signed short ss;
+typedef unsigned short us;
+typedef int si;
+typedef unsigned int ui;
+typedef signed long long sll;
+typedef unsigned long long ull;
+
+#define FOR_EACH_TYPE(M) \
+ M (sc) M (uc) \
+ M (ss) M (us) \
+ M (si) M (ui) \
+ M (sll) M (ull) \
+ M (float) M (double)
+
+#define TEST_VALUE(I) ((I) * 5 / 2)
+
+#define ADD_TEST(TYPE) \
+ void __attribute__((noinline, noclone)) \
+ test_##TYPE (TYPE *a, int step) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i * step + 0] = a[i * step + 0] + 1; \
+ a[i * step + 1] = a[i * step + 1] + 2; \
+ a[i * step + 2] = a[i * step + 2] + 4; \
+ a[i * step + 3] = a[i * step + 3] + 8; \
+ } \
+ } \
+ void __attribute__((noinline, noclone)) \
+ ref_##TYPE (TYPE *a, int step) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i * step + 0] = a[i * step + 0] + 1; \
+ a[i * step + 1] = a[i * step + 1] + 2; \
+ a[i * step + 2] = a[i * step + 2] + 4; \
+ a[i * step + 3] = a[i * step + 3] + 8; \
+ asm volatile (""); \
+ } \
+ }
+
+#define DO_TEST(TYPE) \
+ for (int j = -M; j <= M; ++j) \
+ { \
+ TYPE a[N * M], b[N * M]; \
+ for (int i = 0; i < N * M; ++i) \
+ a[i] = b[i] = TEST_VALUE (i); \
+ int offset = (j < 0 ? N * M - 4 : 0); \
+ test_##TYPE (a + offset, j); \
+ ref_##TYPE (b + offset, j); \
+ if (__builtin_memcmp (a, b, sizeof (a)) != 0) \
+ __builtin_abort (); \
+ }
+
+FOR_EACH_TYPE (ADD_TEST)
+
+int
+main (void)
+{
+ FOR_EACH_TYPE (DO_TEST)
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#define N 87
+#define M 6
+
+typedef signed char sc;
+typedef unsigned char uc;
+typedef signed short ss;
+typedef unsigned short us;
+typedef int si;
+typedef unsigned int ui;
+typedef signed long long sll;
+typedef unsigned long long ull;
+
+#define FOR_EACH_TYPE(M) \
+ M (sc) M (uc) \
+ M (ss) M (us) \
+ M (si) M (ui) \
+ M (sll) M (ull) \
+ M (float) M (double)
+
+#define TEST_VALUE1(I) ((I) * 5 / 2)
+#define TEST_VALUE2(I) ((I) * 11 / 5)
+
+#define ADD_TEST(TYPE) \
+ void __attribute__((noinline, noclone)) \
+ test_##TYPE (TYPE *restrict a, TYPE *restrict b, \
+ int step) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE r1 = a[i * step + 0] += 1; \
+ a[i * step + 1] += 2; \
+ a[i * step + 2] += 4; \
+ a[i * step + 3] += 8; \
+ b[i] += r1; \
+ } \
+ } \
+ \
+ void __attribute__((noinline, noclone)) \
+ ref_##TYPE (TYPE *restrict a, TYPE *restrict b, \
+ int step) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE r1 = a[i * step + 0] += 1; \
+ a[i * step + 1] += 2; \
+ a[i * step + 2] += 4; \
+ a[i * step + 3] += 8; \
+ b[i] += r1; \
+ asm volatile (""); \
+ } \
+ }
+
+#define DO_TEST(TYPE) \
+ for (int j = -M; j <= M; ++j) \
+ { \
+ TYPE a1[N * M], a2[N * M], b1[N], b2[N]; \
+ for (int i = 0; i < N * M; ++i) \
+ a1[i] = a2[i] = TEST_VALUE1 (i); \
+ for (int i = 0; i < N; ++i) \
+ b1[i] = b2[i] = TEST_VALUE2 (i); \
+ int offset = (j < 0 ? N * M - 4 : 0); \
+ test_##TYPE (a1 + offset, b1, j); \
+ ref_##TYPE (a2 + offset, b2, j); \
+ if (__builtin_memcmp (a1, a2, sizeof (a1)) != 0) \
+ __builtin_abort (); \
+ if (__builtin_memcmp (b1, b2, sizeof (b1)) != 0) \
+ __builtin_abort (); \
+ }
+
+FOR_EACH_TYPE (ADD_TEST)
+
+int
+main (void)
+{
+ FOR_EACH_TYPE (DO_TEST)
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* is outside \(-2, 2\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* is outside \(-3, 3\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* is outside \(-4, 4\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]*\) >= 4} "vect" { target vect_int } } } */
+
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 2[)]* is outside \(-4, 4\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 2[)]* is outside \(-6, 6\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 2[)]* is outside \(-8, 8\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 2[)]* >= 8} "vect" { target vect_int } } } */
+
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 4[)]* is outside \(-8, 8\)} "vect" { target { vect_int || vect_float } } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 4[)]* is outside \(-12, 12\)} "vect" { target { vect_int || vect_float } } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 4[)]* is outside \(-16, 16\)} "vect" { target { vect_int || vect_float } } } } */
+/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 4[)]* >= 16} "vect" { target { vect_int || vect_float } } } } */
+
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-16, 16\)} "vect" { target vect_double } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-24, 24\)} "vect" { target vect_double } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-32, 32\)} "vect" { target vect_double } } } */
+/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */
--- /dev/null
+/* { dg-do run } */
+
+#define N 87
+#define M 7
+
+typedef signed char sc;
+typedef unsigned char uc;
+typedef signed short ss;
+typedef unsigned short us;
+typedef int si;
+typedef unsigned int ui;
+typedef signed long long sll;
+typedef unsigned long long ull;
+
+#define FOR_EACH_TYPE(M) \
+ M (sc) M (uc) \
+ M (ss) M (us) \
+ M (si) M (ui) \
+ M (sll) M (ull) \
+ M (float) M (double)
+
+#define TEST_VALUE1(I) ((I) * 5 / 2)
+#define TEST_VALUE2(I) ((I) * 11 / 5)
+
+#define ADD_TEST(TYPE) \
+ void __attribute__((noinline, noclone)) \
+ test_##TYPE (TYPE *restrict a, TYPE *restrict b, \
+ int step) \
+ { \
+ step = step & M; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ TYPE r1 = a[i * step + 0] += 1; \
+ a[i * step + 1] += 2; \
+ a[i * step + 2] += 4; \
+ a[i * step + 3] += 8; \
+ b[i] += r1; \
+ } \
+ } \
+ \
+ void __attribute__((noinline, noclone)) \
+ ref_##TYPE (TYPE *restrict a, TYPE *restrict b, \
+ int step) \
+ { \
+ for (unsigned short i = 0; i < N; ++i) \
+ { \
+ TYPE r1 = a[i * step + 0] += 1; \
+ a[i * step + 1] += 2; \
+ a[i * step + 2] += 4; \
+ a[i * step + 3] += 8; \
+ b[i] += r1; \
+ asm volatile (""); \
+ } \
+ }
+
+#define DO_TEST(TYPE) \
+ for (int j = 0; j <= M; ++j) \
+ { \
+ TYPE a1[N * M], a2[N * M], b1[N], b2[N]; \
+ for (int i = 0; i < N * M; ++i) \
+ a1[i] = a2[i] = TEST_VALUE1 (i); \
+ for (int i = 0; i < N; ++i) \
+ b1[i] = b2[i] = TEST_VALUE2 (i); \
+ test_##TYPE (a1, b1, j); \
+ ref_##TYPE (a2, b2, j); \
+ if (__builtin_memcmp (a1, a2, sizeof (a1)) != 0) \
+ __builtin_abort (); \
+ if (__builtin_memcmp (b1, b2, sizeof (b1)) != 0) \
+ __builtin_abort (); \
+ }
+
+FOR_EACH_TYPE (ADD_TEST)
+
+int
+main (void)
+{
+ FOR_EACH_TYPE (DO_TEST)
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* is outside \[0, 2\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* is outside \[0, 3\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* is outside \[0, 4\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]*\) >= 4} "vect" { target vect_int } } } */
+
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 2[)]* is outside \[0, 4\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 2[)]* is outside \[0, 6\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 2[)]* is outside \[0, 8\)} "vect" { target vect_int } } } */
+/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 2[)]* >= 8} "vect" { target vect_int } } } */
+
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 4[)]* is outside \[0, 8\)} "vect" { target { vect_int || vect_float } }} } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 4[)]* is outside \[0, 12\)} "vect" { target { vect_int || vect_float } }} } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 4[)]* is outside \[0, 16\)} "vect" { target { vect_int || vect_float } }} } */
+/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 4[)]* >= 16} "vect" { target { vect_int || vect_float } }} } */
+
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 16\)} "vect" { target vect_double } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 24\)} "vect" { target vect_double } } } */
+/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 32\)} "vect" { target vect_double } } } */
+/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */
--- /dev/null
+/* { dg-do run } */
+
+#define N 200
+#define DIST 32
+
+typedef signed char sc;
+typedef unsigned char uc;
+typedef signed short ss;
+typedef unsigned short us;
+typedef int si;
+typedef unsigned int ui;
+typedef signed long long sll;
+typedef unsigned long long ull;
+
+#define FOR_EACH_TYPE(M) \
+ M (sc) M (uc) \
+ M (ss) M (us) \
+ M (si) M (ui) \
+ M (sll) M (ull) \
+ M (float) M (double)
+
+#define TEST_VALUE(I) ((I) * 5 / 2)
+
+#define ADD_TEST(TYPE) \
+ TYPE a_##TYPE[N * 2]; \
+ void __attribute__((noinline, noclone)) \
+ test_##TYPE (int x, int y) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ a_##TYPE[i + x] += a_##TYPE[i + y]; \
+ }
+
+#define DO_TEST(TYPE) \
+ for (int i = 0; i < DIST * 2; ++i) \
+ { \
+ for (int j = 0; j < N + DIST * 2; ++j) \
+ a_##TYPE[j] = TEST_VALUE (j); \
+ test_##TYPE (i, DIST); \
+ for (int j = 0; j < N + DIST * 2; ++j) \
+ { \
+ TYPE expected; \
+ if (j < i || j >= i + N) \
+ expected = TEST_VALUE (j); \
+ else if (i <= DIST) \
+ expected = ((TYPE) TEST_VALUE (j) \
+ + (TYPE) TEST_VALUE (j - i + DIST)); \
+ else \
+ expected = ((TYPE) TEST_VALUE (j) \
+ + a_##TYPE[j - i + DIST]); \
+ if (expected != a_##TYPE[j]) \
+ __builtin_abort (); \
+ } \
+ }
+
+FOR_EACH_TYPE (ADD_TEST)
+
+int
+main (void)
+{
+ FOR_EACH_TYPE (DO_TEST)
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#define N 200
+#define M 4
+
+typedef signed char sc;
+typedef unsigned char uc;
+typedef signed short ss;
+typedef unsigned short us;
+typedef int si;
+typedef unsigned int ui;
+typedef signed long long sll;
+typedef unsigned long long ull;
+
+#define FOR_EACH_TYPE(M) \
+ M (sc) M (uc) \
+ M (ss) M (us) \
+ M (si) M (ui) \
+ M (sll) M (ull) \
+ M (float) M (double)
+
+#define TEST_VALUE(I) ((I) * 5 / 2)
+
+#define ADD_TEST(TYPE) \
+ void __attribute__((noinline, noclone)) \
+ test_##TYPE (TYPE *a, TYPE *b) \
+ { \
+ for (int i = 0; i < N; i += 2) \
+ { \
+ a[i + 0] = b[i + 0] + 2; \
+ a[i + 1] = b[i + 1] + 3; \
+ } \
+ }
+
+#define DO_TEST(TYPE) \
+ for (int j = 1; j < M; ++j) \
+ { \
+ TYPE a[N + M]; \
+ for (int i = 0; i < N + M; ++i) \
+ a[i] = TEST_VALUE (i); \
+ test_##TYPE (a + j, a); \
+ for (int i = 0; i < N; i += 2) \
+ if (a[i + j] != (TYPE) (a[i] + 2) \
+ || a[i + j + 1] != (TYPE) (a[i + 1] + 3)) \
+ __builtin_abort (); \
+ }
+
+FOR_EACH_TYPE (ADD_TEST)
+
+int
+main (void)
+{
+ FOR_EACH_TYPE (DO_TEST)
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void
+foo (double *x, int m)
+{
+ for (int i = 0; i < 256; ++i)
+ x[i * m] += x[i * m];
+}
+
+/* { dg-final { scan-assembler-times {\tcbz\tw1,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, } 1 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, } 1 } } */
+/* { dg-final { scan-assembler-times {\tldr\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define TYPE int
+#define SIZE 257
+
+void __attribute__ ((weak))
+f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused)))
+{
+ for (int i = 0; i < SIZE; ++i)
+ x[i * n] += y[i * n];
+}
+
+/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
+/* Should multiply by (VF-1)*4 rather than (257-1)*4. */
+/* { dg-final { scan-assembler-not {, 1024} } } */
+/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
+/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */
+/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */
+/* Two range checks and a check for n being zero. */
+/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
--- /dev/null
+extern void abort (void) __attribute__ ((noreturn));
+
+#define MARGIN 6
+
+void __attribute__ ((weak, optimize ("no-tree-vectorize")))
+test (int n, int m, int offset)
+{
+ int abs_n = (n < 0 ? -n : n);
+ int abs_m = (m < 0 ? -m : m);
+ int max_i = (abs_n > abs_m ? abs_n : abs_m);
+ int abs_offset = (offset < 0 ? -offset : offset);
+ int size = MARGIN * 2 + max_i * SIZE + abs_offset;
+ TYPE *array = (TYPE *) __builtin_alloca (size * sizeof (TYPE));
+ for (int i = 0; i < size; ++i)
+ array[i] = i;
+ int base_x = offset < 0 ? MARGIN - offset : MARGIN;
+ int base_y = offset < 0 ? MARGIN : MARGIN + offset;
+ int start_x = n < 0 ? base_x - n * (SIZE - 1) : base_x;
+ int start_y = m < 0 ? base_y - m * (SIZE - 1) : base_y;
+ f (&array[start_x], &array[start_y], n, m);
+ int j = 0;
+ int start = (n < 0 ? size - 1 : 0);
+ int end = (n < 0 ? -1 : size);
+ int inc = (n < 0 ? -1 : 1);
+ for (int i = start; i != end; i += inc)
+ {
+ if (j == SIZE || i != start_x + j * n)
+ {
+ if (array[i] != i)
+ abort ();
+ }
+ else if (n == 0)
+ {
+ TYPE sum = i;
+ for (; j < SIZE; j++)
+ {
+ int next_y = start_y + j * m;
+ if (n >= 0 ? next_y < i : next_y > i)
+ sum += array[next_y];
+ else if (next_y == i)
+ sum += sum;
+ else
+ sum += next_y;
+ }
+ if (array[i] != sum)
+ abort ();
+ }
+ else
+ {
+ int next_y = start_y + j * m;
+ TYPE base = i;
+ if (n >= 0 ? next_y < i : next_y > i)
+ base += array[next_y];
+ else
+ base += next_y;
+ if (array[i] != base)
+ abort ();
+ j += 1;
+ }
+ }
+}
--- /dev/null
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "var_stride_1.c"
+#include "var_stride_1.h"
+
+int
+main (void)
+{
+ for (int n = 0; n < 10; ++n)
+ for (int offset = -33; offset <= 33; ++offset)
+ test (n, n, offset);
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define TYPE int
+#define SIZE 257
+
+void __attribute__ ((weak))
+f (TYPE *x, TYPE *y, unsigned short n, unsigned short m)
+{
+ for (int i = 0; i < SIZE; ++i)
+ x[i * n] += y[i * m];
+}
+
+/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
+/* Should multiply by (257-1)*4 rather than (VF-1)*4. */
+/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x[0-9]+, x[0-9]+, lsl 10\n} 2 } } */
+/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */
+/* Two range checks and a check for n being zero. (m being zero is OK.) */
+/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
--- /dev/null
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "var_stride_2.c"
+#include "var_stride_1.h"
+
+int
+main (void)
+{
+ for (int n = 0; n < 10; ++n)
+ for (int m = 0; m < 10; ++m)
+ for (int offset = -17; offset <= 17; ++offset)
+ {
+ test (n, m, offset);
+ test (n, m, offset + n * (SIZE - 1));
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define TYPE int
+#define SIZE 257
+
+void __attribute__ ((weak))
+f (TYPE *x, TYPE *y, int n, long m __attribute__((unused)))
+{
+ for (int i = 0; i < SIZE; ++i)
+ x[i * n] += y[i * n];
+}
+
+/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
+/* Should multiply by (VF-1)*4 rather than (257-1)*4. */
+/* { dg-final { scan-assembler-not {, 1024} } } */
+/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
+/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */
+/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
+/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */
+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
+/* Two range checks and a check for n being zero. */
+/* { dg-final { scan-assembler {\tcmp\t} } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
--- /dev/null
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "var_stride_3.c"
+#include "var_stride_1.h"
+
+int
+main (void)
+{
+ for (int n = -10; n < 10; ++n)
+ for (int offset = -33; offset <= 33; ++offset)
+ test (n, n, offset);
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define TYPE int
+#define SIZE 257
+
+void __attribute__ ((weak))
+f (TYPE *x, TYPE *y, int n, int m)
+{
+ for (int i = 0; i < SIZE; ++i)
+ x[i * n] += y[i * m];
+}
+
+/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
+/* Should multiply by (257-1)*4 rather than (VF-1)*4. */
+/* { dg-final { scan-assembler-times {\tlsl\tx[0-9]+, x[0-9]+, 10\n} 2 } } */
+/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */
+/* { dg-final { scan-assembler {\tcmp\tw3, 0} } } */
+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 4 } } */
+/* Two range checks and a check for n being zero. (m being zero is OK.) */
+/* { dg-final { scan-assembler {\tcmp\t} } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
--- /dev/null
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "var_stride_4.c"
+#include "var_stride_1.h"
+
+int
+main (void)
+{
+ for (int n = -10; n < 10; ++n)
+ for (int m = -10; m < 10; ++m)
+ for (int offset = -17; offset <= 17; ++offset)
+ {
+ test (n, m, offset);
+ test (n, m, offset + n * (SIZE - 1));
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define TYPE double
+#define SIZE 257
+
+void __attribute__ ((weak))
+f (TYPE *x, TYPE *y, long n, long m __attribute__((unused)))
+{
+ for (int i = 0; i < SIZE; ++i)
+ x[i * n] += y[i * n];
+}
+
+/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */
+/* Should multiply by (VF-1)*8 rather than (257-1)*8. */
+/* { dg-final { scan-assembler-not {, 2048} } } */
+/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
+/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]11} } } */
+/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
+/* Two range checks and a check for n being zero. */
+/* { dg-final { scan-assembler {\tcmp\t} } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
--- /dev/null
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "var_stride_5.c"
+#include "var_stride_1.h"
+
+int
+main (void)
+{
+ for (int n = -10; n < 10; ++n)
+ for (int offset = -33; offset <= 33; ++offset)
+ test (n, n, offset);
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define TYPE long
+#define SIZE 257
+
+void __attribute__ ((weak))
+f (TYPE *x, TYPE *y, long n, long m)
+{
+ for (int i = 0; i < SIZE; ++i)
+ x[i * n] += y[i * m];
+}
+
+/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tldr\tx[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstr\tx[0-9]+} } } */
+/* Should multiply by (257-1)*8 rather than (VF-1)*8. */
+/* { dg-final { scan-assembler-times {lsl\tx[0-9]+, x[0-9]+, 11} 2 } } */
+/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 4 } } */
+/* Two range checks and a check for n being zero. (m being zero is OK.) */
+/* { dg-final { scan-assembler {\tcmp\t} } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
--- /dev/null
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "var_stride_6.c"
+#include "var_stride_1.h"
+
+int
+main (void)
+{
+ for (int n = -10; n < 10; ++n)
+ for (int m = -10; m < 10; ++m)
+ for (int offset = -17; offset <= 17; ++offset)
+ {
+ test (n, m, offset);
+ test (n, m, offset + n * (SIZE - 1));
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define TYPE double
+#define SIZE 257
+
+void __attribute__ ((weak))
+f (TYPE *x, TYPE *y, long n, long m __attribute__((unused)))
+{
+ for (int i = 0; i < SIZE; ++i)
+ x[i * n] += y[i];
+}
+
+/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */
+/* Should multiply by (257-1)*8 rather than (VF-1)*8. */
+/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x1, 2048} 1 } } */
+/* { dg-final { scan-assembler-times {lsl\tx[0-9]+, x[0-9]+, 11} 1 } } */
+/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
+/* Two range checks and a check for n being zero. */
+/* { dg-final { scan-assembler {\tcmp\t} } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
--- /dev/null
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "var_stride_7.c"
+#include "var_stride_1.h"
+
+int
+main (void)
+{
+ for (int n = -10; n < 10; ++n)
+ for (int offset = -33; offset <= 33; ++offset)
+ test (n, 1, offset);
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define TYPE long
+#define SIZE 257
+
+void
+f (TYPE *x, TYPE *y, long n __attribute__((unused)), long m)
+{
+ for (int i = 0; i < SIZE; ++i)
+ x[i] += y[i * m];
+}
+
+/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tldr\tx[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstr\tx[0-9]+} } } */
+/* Should multiply by (257-1)*8 rather than (VF-1)*8. */
+/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 2048} 1 } } */
+/* { dg-final { scan-assembler-times {lsl\tx[0-9]+, x[0-9]+, 11} 1 } } */
+/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
+/* Two range checks only; doesn't matter whether n is zero. */
+/* { dg-final { scan-assembler {\tcmp\t} } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
--- /dev/null
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "var_stride_8.c"
+#include "var_stride_1.h"
+
+int
+main (void)
+{
+ for (int n = -10; n < 10; ++n)
+ for (int offset = -33; offset <= 33; ++offset)
+ test (1, n, offset);
+ return 0;
+}
--- /dev/null
+! { dg-do run }
+! { dg-additional-options "-fno-inline" }
+
+#define N 200
+
+#define TEST_VALUE(I) ((I) * 5 / 2)
+
+subroutine setup(a)
+ real :: a(N)
+ do i = 1, N
+ a(i) = TEST_VALUE(i)
+ end do
+end subroutine
+
+subroutine check(a, x, gap)
+ real :: a(N), temp, x
+ integer :: gap
+ do i = 1, N - gap
+ temp = a(i + gap) + x
+ if (a(i) /= temp) call abort
+ end do
+ do i = N - gap + 1, N
+ temp = TEST_VALUE(i)
+ if (a(i) /= temp) call abort
+ end do
+end subroutine
+
+subroutine testa(a, x, base, n)
+ real :: a(n), x
+ integer :: base, n
+ do i = n, 2, -1
+ a(base + i - 1) = a(base + i) + x
+ end do
+end subroutine testa
+
+subroutine testb(a, x, base, n)
+ real :: a(n), x
+ integer :: base
+ do i = n, 4, -1
+ a(base + i - 3) = a(base + i) + x
+ end do
+end subroutine testb
+
+subroutine testc(a, x, base, n)
+ real :: a(n), x
+ integer :: base
+ do i = n, 8, -1
+ a(base + i - 7) = a(base + i) + x
+ end do
+end subroutine testc
+
+subroutine testd(a, x, base, n)
+ real :: a(n), x
+ integer :: base
+ do i = n, 16, -1
+ a(base + i - 15) = a(base + i) + x
+ end do
+end subroutine testd
+
+subroutine teste(a, x, base, n)
+ real :: a(n), x
+ integer :: base
+ do i = n, 32, -1
+ a(base + i - 31) = a(base + i) + x
+ end do
+end subroutine teste
+
+subroutine testf(a, x, base, n)
+ real :: a(n), x
+ integer :: base
+ do i = n, 64, -1
+ a(base + i - 63) = a(base + i) + x
+ end do
+end subroutine testf
+
+program main
+ real :: a(N)
+
+ call setup(a)
+ call testa(a, 91.0, 0, N)
+ call check(a, 91.0, 1)
+
+ call setup(a)
+ call testb(a, 55.0, 0, N)
+ call check(a, 55.0, 3)
+
+ call setup(a)
+ call testc(a, 72.0, 0, N)
+ call check(a, 72.0, 7)
+
+ call setup(a)
+ call testd(a, 69.0, 0, N)
+ call check(a, 69.0, 15)
+
+ call setup(a)
+ call teste(a, 44.0, 0, N)
+ call check(a, 44.0, 31)
+
+ call setup(a)
+ call testf(a, 39.0, 0, N)
+ call check(a, 39.0, 63)
+end program
#include "tree-affine.h"
#include "params.h"
#include "builtins.h"
+#include "stringpool.h"
+#include "tree-vrp.h"
+#include "tree-ssanames.h"
static struct datadep_stats
{
return false;
}
- /* FORNOW: We don't support creating runtime alias tests for non-constant
- step. */
- if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
- || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
- {
- if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION,
- "runtime alias check not supported for non-constant "
- "step\n");
- return false;
- }
-
return true;
}
operator == (const dr_with_seg_len& d1,
const dr_with_seg_len& d2)
{
- return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
- DR_BASE_ADDRESS (d2.dr), 0)
- && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
- && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
- && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0;
+ return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
+ DR_BASE_ADDRESS (d2.dr), 0)
+ && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
+ && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
+ && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
+ && known_eq (d1.access_size, d2.access_size)
+ && d1.align == d2.align);
}
/* Comparison function for sorting objects of dr_with_seg_len_pair_t
void
prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
- poly_uint64 factor)
+ poly_uint64)
{
/* Sort the collected data ref pairs so that we can scan them once to
combine all possible aliasing checks. */
}
poly_int64 init_a1, init_a2;
+ /* Only consider cases in which the distance between the initial
+ DR_A1 and the initial DR_A2 is known at compile time. */
if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
DR_BASE_ADDRESS (dr_a2->dr), 0)
|| !operand_equal_p (DR_OFFSET (dr_a1->dr),
std::swap (init_a1, init_a2);
}
- /* Only merge const step data references. */
- poly_int64 step_a1, step_a2;
- if (!poly_int_tree_p (DR_STEP (dr_a1->dr), &step_a1)
- || !poly_int_tree_p (DR_STEP (dr_a2->dr), &step_a2))
- continue;
+ /* Work out what the segment length would be if we did combine
+ DR_A1 and DR_A2:
- bool neg_step = maybe_lt (step_a1, 0) || maybe_lt (step_a2, 0);
+ - If DR_A1 and DR_A2 have equal lengths, that length is
+ also the combined length.
- /* DR_A1 and DR_A2 must go in the same direction. */
- if (neg_step && (maybe_gt (step_a1, 0) || maybe_gt (step_a2, 0)))
- continue;
+ - If DR_A1 and DR_A2 both have negative "lengths", the combined
+ length is the lower bound on those lengths.
- poly_uint64 seg_len_a1 = 0, seg_len_a2 = 0;
- bool const_seg_len_a1 = poly_int_tree_p (dr_a1->seg_len,
- &seg_len_a1);
- bool const_seg_len_a2 = poly_int_tree_p (dr_a2->seg_len,
- &seg_len_a2);
-
- /* We need to compute merged segment length at compilation time for
- dr_a1 and dr_a2, which is impossible if either one has non-const
- segment length. */
- if ((!const_seg_len_a1 || !const_seg_len_a2)
- && maybe_ne (step_a1, step_a2))
- continue;
+ - If DR_A1 and DR_A2 both have positive lengths, the combined
+ length is the upper bound on those lengths.
- bool do_remove = false;
- poly_uint64 diff = init_a2 - init_a1;
- poly_uint64 min_seg_len_b;
- tree new_seg_len;
+ Other cases are unlikely to give a useful combination.
- if (!poly_int_tree_p (dr_b1->seg_len, &min_seg_len_b))
+ The lengths both have sizetype, so the sign is taken from
+ the step instead. */
+ if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
{
- tree step_b = DR_STEP (dr_b1->dr);
- if (!tree_fits_shwi_p (step_b))
+ poly_uint64 seg_len_a1, seg_len_a2;
+ if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
+ || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
continue;
- min_seg_len_b = factor * abs_hwi (tree_to_shwi (step_b));
- }
-
- /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b.
-
- Case A:
- check if the following condition is satisfied:
-
- DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
- where DIFF = DR_A2_INIT - DR_A1_INIT. However,
- SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
- have to make a best estimation. We can get the minimum value
- of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
- then either of the following two conditions can guarantee the
- one above:
+ tree indicator_a = dr_direction_indicator (dr_a1->dr);
+ if (TREE_CODE (indicator_a) != INTEGER_CST)
+ continue;
- 1: DIFF <= MIN_SEG_LEN_B
- 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
- Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need
- to take care of wrapping behavior in it.
+ tree indicator_b = dr_direction_indicator (dr_a2->dr);
+ if (TREE_CODE (indicator_b) != INTEGER_CST)
+ continue;
- Case B:
- If the left segment does not extend beyond the start of the
- right segment the new segment length is that of the right
- plus the segment distance. The condition is like:
+ int sign_a = tree_int_cst_sgn (indicator_a);
+ int sign_b = tree_int_cst_sgn (indicator_b);
- DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant.
+ poly_uint64 new_seg_len;
+ if (sign_a <= 0 && sign_b <= 0)
+ new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
+ else if (sign_a >= 0 && sign_b >= 0)
+ new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
+ else
+ continue;
- Note 1: Case A.2 and B combined together effectively merges every
- dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const.
+ dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
+ new_seg_len);
+ dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
+ }
- Note 2: Above description is based on positive DR_STEP, we need to
- take care of negative DR_STEP for wrapping behavior. See PR80815
- for more information. */
- if (neg_step)
- {
- /* Adjust diff according to access size of both references. */
- diff += tree_to_poly_uint64
- (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr))));
- diff -= tree_to_poly_uint64
- (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr))));
- /* Case A.1. */
- if (known_le (diff, min_seg_len_b)
- /* Case A.2 and B combined. */
- || const_seg_len_a2)
- {
- if (const_seg_len_a1 || const_seg_len_a2)
- new_seg_len
- = build_int_cstu (sizetype,
- lower_bound (seg_len_a1 - diff,
- seg_len_a2));
- else
- new_seg_len
- = size_binop (MINUS_EXPR, dr_a2->seg_len,
- build_int_cstu (sizetype, diff));
+ /* This is always positive due to the swap above. */
+ poly_uint64 diff = init_a2 - init_a1;
- dr_a2->seg_len = new_seg_len;
- do_remove = true;
- }
- }
- else
+ /* The new check will start at DR_A1. Make sure that its access
+ size encompasses the initial DR_A2. */
+ if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
{
- /* Case A.1. */
- if (known_le (diff, min_seg_len_b)
- /* Case A.2 and B combined. */
- || const_seg_len_a1)
- {
- if (const_seg_len_a1 && const_seg_len_a2)
- new_seg_len
- = build_int_cstu (sizetype,
- upper_bound (seg_len_a2 + diff,
- seg_len_a1));
- else
- new_seg_len
- = size_binop (PLUS_EXPR, dr_a2->seg_len,
- build_int_cstu (sizetype, diff));
-
- dr_a1->seg_len = new_seg_len;
- do_remove = true;
- }
+ dr_a1->access_size = upper_bound (dr_a1->access_size,
+ diff + dr_a2->access_size);
+ unsigned int new_align = known_alignment (dr_a1->access_size);
+ dr_a1->align = MIN (dr_a1->align, new_align);
}
-
- if (do_remove)
+ if (dump_enabled_p ())
{
- if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE, "merging ranges for ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
- dump_printf (MSG_NOTE, " and ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
- dump_printf (MSG_NOTE, ", ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
- dump_printf (MSG_NOTE, "\n");
- }
- alias_pairs->ordered_remove (neg_step ? i - 1 : i);
- i--;
+ dump_printf (MSG_NOTE, "merging ranges for ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr));
+ dump_printf (MSG_NOTE, ", ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr));
+ dump_printf (MSG_NOTE, " and ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr));
+ dump_printf (MSG_NOTE, ", ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr));
+ dump_printf (MSG_NOTE, "\n");
}
+ alias_pairs->ordered_remove (i);
+ i--;
}
}
}
|| DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
return false;
- if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
+ poly_uint64 seg_len1, seg_len2;
+ if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
+ || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
return false;
if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
- unsigned HOST_WIDE_INT abs_step
- = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
+ unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
+ if (neg_step)
+ {
+ abs_step = -abs_step;
+ seg_len1 = -seg_len1;
+ seg_len2 = -seg_len2;
+ }
+ else
+ {
+ /* Include the access size in the length, so that we only have one
+ tree addition below. */
+ seg_len1 += dr_a.access_size;
+ seg_len2 += dr_b.access_size;
+ }
- unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
- unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
/* Infer the number of iterations with which the memory segment is accessed
by DR. In other words, alias is checked if memory segment accessed by
DR_A in some iterations intersect with memory segment accessed by DR_B
in the same amount iterations.
Note segnment length is a linear function of number of iterations with
DR_STEP as the coefficient. */
- unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
- unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
+ poly_uint64 niter_len1, niter_len2;
+ if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
+ || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
+ return false;
+
+ poly_uint64 niter_access1 = 0, niter_access2 = 0;
+ if (neg_step)
+ {
+ /* Divide each access size by the byte step, rounding up. */
+ if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
+ abs_step, &niter_access1)
+ || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
+ abs_step, &niter_access2))
+ return false;
+ }
unsigned int i;
for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
/* Adjust ranges for negative step. */
if (neg_step)
{
- min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
- max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
- CHREC_LEFT (access1), idx_step);
- min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
- max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
- CHREC_LEFT (access2), idx_step);
+ /* IDX_LEN1 and IDX_LEN2 are negative in this case. */
+ std::swap (min1, max1);
+ std::swap (min2, max2);
+
+ /* As with the lengths just calculated, we've measured the access
+ sizes in iterations, so multiply them by the index step. */
+ tree idx_access1
+ = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
+ build_int_cst (TREE_TYPE (min1), niter_access1));
+ tree idx_access2
+ = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
+ build_int_cst (TREE_TYPE (min2), niter_access2));
+
+ /* MINUS_EXPR because the above values are negative. */
+ max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
+ max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
}
tree part_cond_expr
= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
return true;
}
+/* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
+ every address ADDR accessed by D:
+
+ *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
+
+ In this case, every element accessed by D is aligned to at least
+ ALIGN bytes.
+
+ If ALIGN is zero then instead set *SEG_MAX_OUT so that:
+
+ *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
+
+static void
+get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
+ tree *seg_max_out, HOST_WIDE_INT align)
+{
+ /* Each access has the following pattern:
+
+ <- |seg_len| ->
+ <--- A: -ve step --->
+ +-----+-------+-----+-------+-----+
+ | n-1 | ,.... | 0 | ..... | n-1 |
+ +-----+-------+-----+-------+-----+
+ <--- B: +ve step --->
+ <- |seg_len| ->
+ |
+ base address
+
+ where "n" is the number of scalar iterations covered by the segment.
+ (This should be VF for a particular pair if we know that both steps
+ are the same, otherwise it will be the full number of scalar loop
+ iterations.)
+
+ A is the range of bytes accessed when the step is negative,
+ B is the range when the step is positive.
+
+ If the access size is "access_size" bytes, the lowest addressed byte is:
+
+ base + (step < 0 ? seg_len : 0) [LB]
+
+ and the highest addressed byte is always below:
+
+ base + (step < 0 ? 0 : seg_len) + access_size [UB]
+
+ Thus:
+
+ LB <= ADDR < UB
+
+ If ALIGN is nonzero, all three values are aligned to at least ALIGN
+ bytes, so:
+
+ LB <= ADDR <= UB - ALIGN
+
+ where "- ALIGN" folds naturally with the "+ access_size" and often
+ cancels it out.
+
+ We don't try to simplify LB and UB beyond this (e.g. by using
+ MIN and MAX based on whether seg_len rather than the stride is
+ negative) because it is possible for the absolute size of the
+ segment to overflow the range of a ssize_t.
+
+ Keeping the pointer_plus outside of the cond_expr should allow
+ the cond_exprs to be shared with other alias checks. */
+ tree indicator = dr_direction_indicator (d.dr);
+ tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
+ fold_convert (ssizetype, indicator),
+ ssize_int (0));
+ tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
+ DR_OFFSET (d.dr));
+ addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
+ tree seg_len = fold_convert (sizetype, d.seg_len);
+
+ tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
+ seg_len, size_zero_node);
+ tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
+ size_zero_node, seg_len);
+ max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
+ size_int (d.access_size - align));
+
+ *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
+ *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
+}
+
/* Given two data references and segment lengths described by DR_A and DR_B,
create expression checking if the two addresses ranges intersect with
each other:
if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
return;
- tree segment_length_a = dr_a.seg_len;
- tree segment_length_b = dr_b.seg_len;
- tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
- tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
- tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
-
- offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
- offset_a, DR_INIT (dr_a.dr));
- offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
- offset_b, DR_INIT (dr_b.dr));
- addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
- addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
-
- tree seg_a_min = addr_base_a;
- tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
- /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
- bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
- [a, a+12) */
- if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
+ unsigned HOST_WIDE_INT min_align;
+ tree_code cmp_code;
+ if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
+ && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
{
- tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
- seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
- seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
+ /* In this case adding access_size to seg_len is likely to give
+ a simple X * step, where X is either the number of scalar
+ iterations or the vectorization factor. We're better off
+ keeping that, rather than subtracting an alignment from it.
+
+ In this case the maximum values are exclusive and so there is
+ no alias if the maximum of one segment equals the minimum
+ of another. */
+ min_align = 0;
+ cmp_code = LE_EXPR;
}
-
- tree seg_b_min = addr_base_b;
- tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
- if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
+ else
{
- tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
- seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
- seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
+ /* Calculate the minimum alignment shared by all four pointers,
+ then arrange for this alignment to be subtracted from the
+ exclusive maximum values to get inclusive maximum values.
+ This "- min_align" is cumulative with a "+ access_size"
+ in the calculation of the maximum values. In the best
+ (and common) case, the two cancel each other out, leaving
+ us with an inclusive bound based only on seg_len. In the
+ worst case we're simply adding a smaller number than before.
+
+ Because the maximum values are inclusive, there is an alias
+ if the maximum value of one segment is equal to the minimum
+ value of the other. */
+ min_align = MIN (dr_a.align, dr_b.align);
+ cmp_code = LT_EXPR;
}
+
+ tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
+ get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
+ get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
+
*cond_expr
= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
- fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
- fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
+ fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
+ fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
}
/* Create a conditional expression that represents the run-time checks for
free_data_ref (dr);
datarefs.release ();
}
+
+/* Common routine implementing both dr_direction_indicator and
+ dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
+ to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
+ Return the step as the indicator otherwise. */
+
+static tree
+dr_step_indicator (struct data_reference *dr, int useful_min)
+{
+ tree step = DR_STEP (dr);
+ STRIP_NOPS (step);
+ /* Look for cases where the step is scaled by a positive constant
+ integer, which will often be the access size. If the multiplication
+ doesn't change the sign (due to overflow effects) then we can
+ test the unscaled value instead. */
+ if (TREE_CODE (step) == MULT_EXPR
+ && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
+ && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
+ {
+ tree factor = TREE_OPERAND (step, 1);
+ step = TREE_OPERAND (step, 0);
+
+ /* Strip widening and truncating conversions as well as nops. */
+ if (CONVERT_EXPR_P (step)
+ && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
+ step = TREE_OPERAND (step, 0);
+ tree type = TREE_TYPE (step);
+
+ /* Get the range of step values that would not cause overflow. */
+ widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
+ / wi::to_widest (factor));
+ widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
+ / wi::to_widest (factor));
+
+ /* Get the range of values that the unconverted step actually has. */
+ wide_int step_min, step_max;
+ if (TREE_CODE (step) != SSA_NAME
+ || get_range_info (step, &step_min, &step_max) != VR_RANGE)
+ {
+ step_min = wi::to_wide (TYPE_MIN_VALUE (type));
+ step_max = wi::to_wide (TYPE_MAX_VALUE (type));
+ }
+
+ /* Check whether the unconverted step has an acceptable range. */
+ signop sgn = TYPE_SIGN (type);
+ if (wi::les_p (minv, widest_int::from (step_min, sgn))
+ && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
+ {
+ if (wi::ge_p (step_min, useful_min, sgn))
+ return ssize_int (useful_min);
+ else if (wi::lt_p (step_max, 0, sgn))
+ return ssize_int (-1);
+ else
+ return fold_convert (ssizetype, step);
+ }
+ }
+ return DR_STEP (dr);
+}
+
+/* Return a value that is negative iff DR has a negative step. */
+
+tree
+dr_direction_indicator (struct data_reference *dr)
+{
+ return dr_step_indicator (dr, 0);
+}
+
+/* Return a value that is zero iff DR has a zero step. */
+
+tree
+dr_zero_step_indicator (struct data_reference *dr)
+{
+ return dr_step_indicator (dr, 1);
+}
+
+/* Return true if DR is known to have a nonnegative (but possibly zero)
+ step. */
+
+bool
+dr_known_forward_stride_p (struct data_reference *dr)
+{
+ tree indicator = dr_direction_indicator (dr);
+ tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
+ fold_convert (ssizetype, indicator),
+ ssize_int (0));
+ return neg_step_val && integer_zerop (neg_step_val);
+}
struct dr_with_seg_len
{
- dr_with_seg_len (data_reference_p d, tree len)
- : dr (d), seg_len (len) {}
+ dr_with_seg_len (data_reference_p d, tree len, unsigned HOST_WIDE_INT size,
+ unsigned int a)
+ : dr (d), seg_len (len), access_size (size), align (a) {}
data_reference_p dr;
+ /* The offset of the last access that needs to be checked minus
+ the offset of the first. */
tree seg_len;
+ /* A value that, when added to abs (SEG_LEN), gives the total number of
+ bytes in the segment. */
+ poly_uint64 access_size;
+ /* The minimum common alignment of DR's start address, SEG_LEN and
+ ACCESS_SIZE. */
+ unsigned int align;
};
/* This struct contains two dr_with_seg_len objects with aliasing data
poly_uint64);
extern void create_runtime_alias_checks (struct loop *,
vec<dr_with_seg_len_pair_t> *, tree*);
+extern tree dr_direction_indicator (struct data_reference *);
+extern tree dr_zero_step_indicator (struct data_reference *);
+extern bool dr_known_forward_stride_p (struct data_reference *);
+
/* Return true when the base objects of data references A and B are
the same memory object. */
static tree
data_ref_segment_size (struct data_reference *dr, tree niters)
{
- tree segment_length;
-
- if (integer_zerop (DR_STEP (dr)))
- segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
- else
- segment_length = size_binop (MULT_EXPR,
- fold_convert (sizetype, DR_STEP (dr)),
- fold_convert (sizetype, niters));
-
- return segment_length;
+ niters = size_binop (MINUS_EXPR,
+ fold_convert (sizetype, niters),
+ size_one_node);
+ return size_binop (MULT_EXPR,
+ fold_convert (sizetype, DR_STEP (dr)),
+ fold_convert (sizetype, niters));
}
/* Return true if LOOP's latch is dominated by statement for data reference
else
seg_length_b = data_ref_segment_size (dr_b, niters);
+ unsigned HOST_WIDE_INT access_size_a
+ = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a))));
+ unsigned HOST_WIDE_INT access_size_b
+ = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b))));
+ unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a)));
+ unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b)));
+
dr_with_seg_len_pair_t dr_with_seg_len_pair
- (dr_with_seg_len (dr_a, seg_length_a),
- dr_with_seg_len (dr_b, seg_length_b));
+ (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
+ dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b));
/* Canonicalize pairs by sorting the two DR members. */
if (comp_res > 0)
return true;
}
+/* Record that loop LOOP_VINFO needs to check that VALUE is nonzero. */
+
+static void
+vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
+{
+ vec<tree> checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
+ for (unsigned int i = 0; i < checks.length(); ++i)
+ if (checks[i] == value)
+ return;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "need run-time check that ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, value);
+ dump_printf (MSG_NOTE, " is nonzero\n");
+ }
+ LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
+}
+
+/* Return true if we know that the order of vectorized STMT_A and
+ vectorized STMT_B will be the same as the order of STMT_A and STMT_B.
+ At least one of the statements is a write. */
+
+static bool
+vect_preserves_scalar_order_p (gimple *stmt_a, gimple *stmt_b)
+{
+ stmt_vec_info stmtinfo_a = vinfo_for_stmt (stmt_a);
+ stmt_vec_info stmtinfo_b = vinfo_for_stmt (stmt_b);
+
+ /* Single statements are always kept in their original order. */
+ if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
+ && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
+ return true;
+
+ /* STMT_A and STMT_B belong to overlapping groups. All loads in a
+ group are emitted at the position of the first scalar load and all
+ stores in a group are emitted at the position of the last scalar store.
+ Thus writes will happen no earlier than their current position
+ (but could happen later) while reads will happen no later than their
+ current position (but could happen earlier). Reordering is therefore
+ only possible if the first access is a write. */
+ gimple *earlier_stmt = get_earlier_stmt (stmt_a, stmt_b);
+ return !DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt)));
+}
/* A subroutine of vect_analyze_data_ref_dependence. Handle
DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
... = a[i];
a[i+1] = ...;
where loads from the group interleave with the store. */
- if (STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
- || STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
+ if (!vect_preserves_scalar_order_p (DR_STMT (dra), DR_STMT (drb)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "READ_WRITE dependence in interleaving.\n");
+ return true;
+ }
+
+ if (!loop->force_vectorize)
{
- gimple *earlier_stmt;
- earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
- if (DR_IS_WRITE
- (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
+ tree indicator = dr_zero_step_indicator (dra);
+ if (TREE_CODE (indicator) != INTEGER_CST)
+ vect_check_nonzero_value (loop_vinfo, indicator);
+ else if (integer_zerop (indicator))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "READ_WRITE dependence in interleaving."
- "\n");
+ "access also has a zero step\n");
return true;
}
}
-
continue;
}
/* Function vect_vfa_segment_size.
- Create an expression that computes the size of segment
- that will be accessed for a data reference. The functions takes into
- account that realignment loads may access one more vector.
-
Input:
DR: The data reference.
LENGTH_FACTOR: segment length to consider.
- Return an expression whose value is the size of segment which will be
- accessed by DR. */
+ Return a value suitable for the dr_with_seg_len::seg_len field.
+ This is the "distance travelled" by the pointer from the first
+ iteration in the segment to the last. Note that it does not include
+ the size of the access; in effect it only describes the first byte. */
static tree
vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
{
- tree segment_length;
+ length_factor = size_binop (MINUS_EXPR,
+ fold_convert (sizetype, length_factor),
+ size_one_node);
+ return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr)),
+ length_factor);
+}
- if (integer_zerop (DR_STEP (dr)))
- segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
- else
- segment_length = size_binop (MULT_EXPR,
- fold_convert (sizetype, DR_STEP (dr)),
- fold_convert (sizetype, length_factor));
+/* Return a value that, when added to abs (vect_vfa_segment_size (dr)),
+ gives the worst-case number of bytes covered by the segment. */
- if (vect_supportable_dr_alignment (dr, false)
- == dr_explicit_realign_optimized)
+static unsigned HOST_WIDE_INT
+vect_vfa_access_size (data_reference *dr)
+{
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (DR_STMT (dr));
+ tree ref_type = TREE_TYPE (DR_REF (dr));
+ unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
+ unsigned HOST_WIDE_INT access_size = ref_size;
+ if (GROUP_FIRST_ELEMENT (stmt_vinfo))
{
- tree vector_size = TYPE_SIZE_UNIT
- (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
-
- segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
+ gcc_assert (GROUP_FIRST_ELEMENT (stmt_vinfo) == DR_STMT (dr));
+ access_size *= GROUP_SIZE (stmt_vinfo) - GROUP_GAP (stmt_vinfo);
+ }
+ if (STMT_VINFO_VEC_STMT (stmt_vinfo)
+ && (vect_supportable_dr_alignment (dr, false)
+ == dr_explicit_realign_optimized))
+ {
+ /* We might access a full vector's worth. */
+ tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+ access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
}
- return segment_length;
+ return access_size;
+}
+
+/* Get the minimum alignment for all the scalar accesses that DR describes. */
+
+static unsigned int
+vect_vfa_align (const data_reference *dr)
+{
+ return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr)));
}
/* Function vect_no_alias_p.
Given data references A and B with equal base and offset, see whether
the alias relation can be decided at compilation time. Return 1 if
it can and the references alias, 0 if it can and the references do
- not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A
- and SEGMENT_LENGTH_B are the memory lengths accessed by A and B
- respectively. */
+ not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A,
+ SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
+ of dr_with_seg_len::{seg_len,access_size} for A and B. */
static int
vect_compile_time_alias (struct data_reference *a, struct data_reference *b,
- tree segment_length_a, tree segment_length_b)
+ tree segment_length_a, tree segment_length_b,
+ unsigned HOST_WIDE_INT access_size_a,
+ unsigned HOST_WIDE_INT access_size_b)
{
poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a));
poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b));
if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0)
{
const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
- offset_a = (offset_a + vect_get_scalar_dr_size (a)) - const_length_a;
+ offset_a = (offset_a + access_size_a) - const_length_a;
}
else
const_length_a = tree_to_poly_uint64 (segment_length_a);
if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0)
{
const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
- offset_b = (offset_b + vect_get_scalar_dr_size (b)) - const_length_b;
+ offset_b = (offset_b + access_size_b) - const_length_b;
}
else
const_length_b = tree_to_poly_uint64 (segment_length_b);
+ const_length_a += access_size_a;
+ const_length_b += access_size_b;
+
if (ranges_known_overlap_p (offset_a, const_length_a,
offset_b, const_length_b))
return 1;
return true;
}
+/* Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. */
+
+static void
+dump_lower_bound (int dump_kind, const vec_lower_bound &lower_bound)
+{
+ dump_printf (dump_kind, "%s (", lower_bound.unsigned_p ? "unsigned" : "abs");
+ dump_generic_expr (dump_kind, TDF_SLIM, lower_bound.expr);
+ dump_printf (dump_kind, ") >= ");
+ dump_dec (dump_kind, lower_bound.min_value);
+}
+
+/* Record that the vectorized loop requires the vec_lower_bound described
+ by EXPR, UNSIGNED_P and MIN_VALUE. */
+
+static void
+vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
+ poly_uint64 min_value)
+{
+ vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
+ for (unsigned int i = 0; i < lower_bounds.length (); ++i)
+ if (operand_equal_p (lower_bounds[i].expr, expr, 0))
+ {
+ unsigned_p &= lower_bounds[i].unsigned_p;
+ min_value = upper_bound (lower_bounds[i].min_value, min_value);
+ if (lower_bounds[i].unsigned_p != unsigned_p
+ || maybe_lt (lower_bounds[i].min_value, min_value))
+ {
+ lower_bounds[i].unsigned_p = unsigned_p;
+ lower_bounds[i].min_value = min_value;
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "updating run-time check to ");
+ dump_lower_bound (MSG_NOTE, lower_bounds[i]);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ }
+ return;
+ }
+
+ vec_lower_bound lower_bound (expr, unsigned_p, min_value);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
+ dump_lower_bound (MSG_NOTE, lower_bound);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
+}
+
+/* Return true if it's unlikely that the step of the vectorized form of DR
+ will span fewer than GAP bytes. */
+
+static bool
+vect_small_gap_p (loop_vec_info loop_vinfo, data_reference *dr, poly_int64 gap)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
+ HOST_WIDE_INT count
+ = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+ if (GROUP_FIRST_ELEMENT (stmt_info))
+ count *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
+ return estimated_poly_value (gap) <= count * vect_get_scalar_dr_size (dr);
+}
+
+/* Return true if we know that there is no alias between DR_A and DR_B
+ when abs (DR_STEP (DR_A)) >= N for some N. When returning true, set
+ *LOWER_BOUND_OUT to this N. */
+
+static bool
+vectorizable_with_step_bound_p (data_reference *dr_a, data_reference *dr_b,
+ poly_uint64 *lower_bound_out)
+{
+ /* Check that there is a constant gap of known sign between DR_A
+ and DR_B. */
+ poly_int64 init_a, init_b;
+ if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
+ || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
+ || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
+ || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
+ || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
+ || !ordered_p (init_a, init_b))
+ return false;
+
+ /* Sort DR_A and DR_B by the address they access. */
+ if (maybe_lt (init_b, init_a))
+ {
+ std::swap (init_a, init_b);
+ std::swap (dr_a, dr_b);
+ }
+
+ /* If the two accesses could be dependent within a scalar iteration,
+ make sure that we'd retain their order. */
+ if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_a), init_b)
+ && !vect_preserves_scalar_order_p (DR_STMT (dr_a), DR_STMT (dr_b)))
+ return false;
+
+ /* There is no alias if abs (DR_STEP) is greater than or equal to
+ the bytes spanned by the combination of the two accesses. */
+ *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_b) - init_a;
+ return true;
+}
+
/* Function vect_prune_runtime_alias_test_list.
Prune a list of ddrs to be tested at run-time by versioning for alias.
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_prune_runtime_alias_test_list ===\n");
+ /* Step values are irrelevant for aliasing if the number of vector
+ iterations is equal to the number of scalar iterations (which can
+ happen for fully-SLP loops). */
+ bool ignore_step_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
+
+ if (!ignore_step_p)
+ {
+ /* Convert the checks for nonzero steps into bound tests. */
+ tree value;
+ FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
+ vect_check_lower_bound (loop_vinfo, value, true, 1);
+ }
+
if (may_alias_ddrs.is_empty ())
return true;
FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
{
int comp_res;
+ poly_uint64 lower_bound;
struct data_reference *dr_a, *dr_b;
gimple *dr_group_first_a, *dr_group_first_b;
tree segment_length_a, segment_length_b;
+ unsigned HOST_WIDE_INT access_size_a, access_size_b;
+ unsigned int align_a, align_b;
gimple *stmt_a, *stmt_b;
/* Ignore the alias if the VF we chose ended up being no greater
dr_a = DDR_A (ddr);
stmt_a = DR_STMT (DDR_A (ddr));
+
+ dr_b = DDR_B (ddr);
+ stmt_b = DR_STMT (DDR_B (ddr));
+
+ /* Skip the pair if inter-iteration dependencies are irrelevant
+ and intra-iteration dependencies are guaranteed to be honored. */
+ if (ignore_step_p
+ && (vect_preserves_scalar_order_p (stmt_a, stmt_b)
+ || vectorizable_with_step_bound_p (dr_a, dr_b, &lower_bound)))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "no need for alias check between ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
+ dump_printf (MSG_NOTE, " and ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
+ dump_printf (MSG_NOTE, " when VF is 1\n");
+ }
+ continue;
+ }
+
+ /* See whether we can handle the alias using a bounds check on
+ the step, and whether that's likely to be the best approach.
+ (It might not be, for example, if the minimum step is much larger
+ than the number of bytes handled by one vector iteration.) */
+ if (!ignore_step_p
+ && TREE_CODE (DR_STEP (dr_a)) != INTEGER_CST
+ && vectorizable_with_step_bound_p (dr_a, dr_b, &lower_bound)
+ && (vect_small_gap_p (loop_vinfo, dr_a, lower_bound)
+ || vect_small_gap_p (loop_vinfo, dr_b, lower_bound)))
+ {
+ bool unsigned_p = dr_known_forward_stride_p (dr_a);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "no alias between ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
+ dump_printf (MSG_NOTE, " and ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
+ dump_printf (MSG_NOTE, " when the step ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_STEP (dr_a));
+ dump_printf (MSG_NOTE, " is outside ");
+ if (unsigned_p)
+ dump_printf (MSG_NOTE, "[0");
+ else
+ {
+ dump_printf (MSG_NOTE, "(");
+ dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
+ }
+ dump_printf (MSG_NOTE, ", ");
+ dump_dec (MSG_NOTE, lower_bound);
+ dump_printf (MSG_NOTE, ")\n");
+ }
+ vect_check_lower_bound (loop_vinfo, DR_STEP (dr_a), unsigned_p,
+ lower_bound);
+ continue;
+ }
+
dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
if (dr_group_first_a)
{
dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
}
- dr_b = DDR_B (ddr);
- stmt_b = DR_STMT (DDR_B (ddr));
dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
if (dr_group_first_b)
{
dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
}
- if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
- length_factor = scalar_loop_iters;
+ if (ignore_step_p)
+ {
+ segment_length_a = size_zero_node;
+ segment_length_b = size_zero_node;
+ }
else
- length_factor = size_int (vect_factor);
- segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
- segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
+ {
+ if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
+ length_factor = scalar_loop_iters;
+ else
+ length_factor = size_int (vect_factor);
+ segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
+ segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
+ }
+ access_size_a = vect_vfa_access_size (dr_a);
+ access_size_b = vect_vfa_access_size (dr_b);
+ align_a = vect_vfa_align (dr_a);
+ align_b = vect_vfa_align (dr_b);
comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
DR_BASE_ADDRESS (dr_b));
{
int res = vect_compile_time_alias (dr_a, dr_b,
segment_length_a,
- segment_length_b);
+ segment_length_b,
+ access_size_a,
+ access_size_b);
+ if (res >= 0 && dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "can tell at compile time that ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
+ dump_printf (MSG_NOTE, " and ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
+ if (res == 0)
+ dump_printf (MSG_NOTE, " do not alias\n");
+ else
+ dump_printf (MSG_NOTE, " alias\n");
+ }
+
if (res == 0)
continue;
}
dr_with_seg_len_pair_t dr_with_seg_len_pair
- (dr_with_seg_len (dr_a, segment_length_a),
- dr_with_seg_len (dr_b, segment_length_b));
+ (dr_with_seg_len (dr_a, segment_length_a, access_size_a, align_a),
+ dr_with_seg_len (dr_b, segment_length_b, access_size_b, align_b));
/* Canonicalize pairs by sorting the two DR members. */
if (comp_res > 0)
unsigned int count = (comp_alias_ddrs.length ()
+ check_unequal_addrs.length ());
+
dump_printf_loc (MSG_NOTE, vect_location,
"improved number of alias checks from %d to %d\n",
may_alias_ddrs.length (), count);
}
}
+/* Create an expression that is true when all lower-bound conditions for
+ the vectorized loop are met. Chain this condition with *COND_EXPR. */
+
+static void
+vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
+{
+ vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
+ for (unsigned int i = 0; i < lower_bounds.length (); ++i)
+ {
+ tree expr = lower_bounds[i].expr;
+ tree type = unsigned_type_for (TREE_TYPE (expr));
+ expr = fold_convert (type, expr);
+ poly_uint64 bound = lower_bounds[i].min_value;
+ if (!lower_bounds[i].unsigned_p)
+ {
+ expr = fold_build2 (PLUS_EXPR, type, expr,
+ build_int_cstu (type, bound - 1));
+ bound += bound - 1;
+ }
+ tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
+ build_int_cstu (type, bound));
+ chain_cond_expr (cond_expr, part_cond_expr);
+ }
+}
+
/* Function vect_create_cond_for_alias_checks.
Create a conditional expression that represents the run-time checks for
if (version_alias)
{
vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
+ vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
}
}
}
/* Free optimized alias test DDRS. */
+ LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).truncate (0);
LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();
/* Reset target cost data. */
/* Count LEN - 1 ANDs and LEN comparisons. */
(void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt,
NULL, 0, vect_prologue);
+ len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
+ if (len)
+ {
+ /* Count LEN - 1 ANDs and LEN comparisons. */
+ unsigned int nstmts = len * 2 - 1;
+ /* +1 for each bias that needs adding. */
+ for (unsigned int i = 0; i < len; ++i)
+ if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
+ nstmts += 1;
+ (void) add_stmt_cost (target_cost_data, nstmts, scalar_stmt,
+ NULL, 0, vect_prologue);
+ }
dump_printf (MSG_NOTE,
"cost model: Adding cost of checks for loop "
"versioning aliasing.\n");
loop to be valid. */
typedef std::pair<tree, tree> vec_object_pair;
+/* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE.
+ UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */
+struct vec_lower_bound {
+ vec_lower_bound () {}
+ vec_lower_bound (tree e, bool u, poly_uint64 m)
+ : expr (e), unsigned_p (u), min_value (m) {}
+
+ tree expr;
+ bool unsigned_p;
+ poly_uint64 min_value;
+};
+
/* Vectorizer state common between loop and basic-block vectorization. */
struct vec_info {
enum vec_kind { bb, loop };
/* Check that the addresses of each pair of objects is unequal. */
auto_vec<vec_object_pair> check_unequal_addrs;
+ /* List of values that are required to be nonzero. This is used to check
+ whether things like "x[i * n] += 1;" are safe and eventually gets added
+ to the checks for lower bounds below. */
+ auto_vec<tree> check_nonzero;
+
+ /* List of values that need to be checked for a minimum value. */
+ auto_vec<vec_lower_bound> lower_bounds;
+
/* Statements in the loop that have data references that are candidates for a
runtime (loop versioning) misalignment check. */
auto_vec<gimple *> may_misalign_stmts;
#define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
#define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs
#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs
+#define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero
+#define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds
#define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores
#define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
#define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
((L)->may_misalign_stmts.length () > 0)
#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
((L)->comp_alias_ddrs.length () > 0 \
- || (L)->check_unequal_addrs.length () > 0)
+ || (L)->check_unequal_addrs.length () > 0 \
+ || (L)->lower_bounds.length () > 0)
#define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \
(LOOP_VINFO_NITERS_ASSUMPTIONS (L))
#define LOOP_REQUIRES_VERSIONING(L) \