+2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
+
+ PR tree-optimization/87288
+ * tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS
+ into account when determining PEELING_FOR_NITERS.
+
2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/86877
+2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
+
+ PR tree-optimization/87288
+ * gcc.dg/vect/pr87288-1.c: New test.
+ * gcc.dg/vect/pr87288-2.c: Likewise,
+ * gcc.dg/vect/pr87288-3.c: Likewise.
+
2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/86877
--- /dev/null
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+void __attribute__ ((noipa))
+run (int *restrict a, int *restrict b, int count)
+{
+ for (int i = 0; i < count * N; ++i)
+ {
+ a[i * 2] = b[i * 2] + count;
+ a[i * 2 + 1] = count;
+ }
+}
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+ for (int i = 0; i < count * N; ++i)
+ if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+ __builtin_abort ();
+ if (a[count * 2 * N] != 999)
+ __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
+
+int
+main (void)
+{
+ check_vect ();
+
+ for (int i = 0; i < N * MAX_COUNT; ++i)
+ {
+ b[i * 2] = i * 41;
+ asm volatile ("" ::: "memory");
+ }
+
+ for (int i = 0; i <= MAX_COUNT; ++i)
+ {
+ a[i * 2 * N] = 999;
+ run (a, b, i);
+ check (a, i);
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 1 "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
--- /dev/null
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+#define RUN_COUNT(COUNT) \
+ void __attribute__ ((noipa)) \
+ run_##COUNT (int *restrict a, int *restrict b) \
+ { \
+ for (int i = 0; i < N * COUNT; ++i) \
+ { \
+ a[i * 2] = b[i * 2] + COUNT; \
+ a[i * 2 + 1] = COUNT; \
+ } \
+ }
+
+RUN_COUNT (1)
+RUN_COUNT (2)
+RUN_COUNT (3)
+RUN_COUNT (4)
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+ for (int i = 0; i < count * N; ++i)
+ if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+ __builtin_abort ();
+ if (a[count * 2 * N] != 999)
+ __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
+
+int
+main (void)
+{
+ check_vect ();
+
+ for (int i = 0; i < N * MAX_COUNT; ++i)
+ {
+ b[i * 2] = i * 41;
+ asm volatile ("" ::: "memory");
+ }
+
+ a[N * 2] = 999;
+ run_1 (a, b);
+ check (a, 1);
+
+ a[N * 4] = 999;
+ run_2 (a, b);
+ check (a, 2);
+
+ a[N * 6] = 999;
+ run_3 (a, b);
+ check (a, 3);
+
+ a[N * 8] = 999;
+ run_4 (a, b);
+ check (a, 4);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
--- /dev/null
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+#define RUN_COUNT(COUNT) \
+ void __attribute__ ((noipa)) \
+ run_##COUNT (int *restrict a, int *restrict b) \
+ { \
+ for (int i = 0; i < N * COUNT + 1; ++i) \
+ { \
+ a[i * 2] = b[i * 2] + COUNT; \
+ a[i * 2 + 1] = COUNT; \
+ } \
+ }
+
+RUN_COUNT (1)
+RUN_COUNT (2)
+RUN_COUNT (3)
+RUN_COUNT (4)
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+ for (int i = 0; i < count * N + 1; ++i)
+ if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+ __builtin_abort ();
+ if (a[count * 2 * N + 2] != 999)
+ __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 3], b[N * MAX_COUNT * 2 + 2];
+
+int
+main (void)
+{
+ check_vect ();
+
+ for (int i = 0; i < N * MAX_COUNT + 1; ++i)
+ {
+ b[i * 2] = i * 41;
+ asm volatile ("" ::: "memory");
+ }
+
+ a[N * 2 + 2] = 999;
+ run_1 (a, b);
+ check (a, 1);
+
+ a[N * 4 + 2] = 999;
+ run_2 (a, b);
+ check (a, 2);
+
+ a[N * 6 + 2] = 999;
+ run_3 (a, b);
+ check (a, 3);
+
+ a[N * 8 + 2] = 999;
+ run_4 (a, b);
+ check (a, 4);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
/* The main loop handles all iterations. */
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
{
- if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)
- - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo),
+ /* Work out the (constant) number of iterations that need to be
+ peeled for reasons other than niters. */
+ unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ peel_niter += 1;
+ if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
}
else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ /* ??? When peeling for gaps but not alignment, we could
+ try to check whether the (variable) niters is known to be
+ VF * N + 1. That's something of a niche case though. */
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
|| ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
< (unsigned) exact_log2 (const_vf))