+2015-09-04 Andrey Turetskiy <andrey.turetskiy@intel.com>
+ Petr Murzin <petr.murzin@intel.com>
+ Kirill Yukhin <kirill.yukhin@intel.com>
+
+ * gcc.target/i386/avx512f-scatter-1.c: New.
+ * gcc.target/i386/avx512f-scatter-2.c: Ditto.
+ * gcc.target/i386/avx512f-scatter-3.c: Ditto.
+
2015-09-04 Janne Blomqvist <jb@gcc.gnu.org>
* gfortran.dg/read_dir.f90: Delete empty directory when closing
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mavx512f" } */
+
+#define AVX512F
+
+#include "avx512f-check.h"
+
+#define N 1024
+float vf1[N], vf2[2*N+16];
+double vd1[N], vd2[2*N+16];
+int vi1[N], vi2[2*N+16], k[N];
+long vl1[N], vl2[2*N+16], l[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[k[i]] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[k[i]] = vi1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[k[i] + x] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f4 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[k[i] + x] = vi1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[k[i]] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[k[i]] = vl1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[k[i] + x] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[k[i] + x] = vl1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f9 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[l[i]] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f10 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[l[i]] = vi1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f11 (long x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[l[i] + x] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f12 (long x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[l[i] + x] = vi1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f13 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[l[i]] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f14 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[l[i]] = vl1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f15 (long x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[l[i] + x] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f16 (long x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[l[i] + x] = vl1[i];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ asm ("");
+ vf1[i] = 17.0f + i;
+ vd1[i] = 19.0 + i;
+ vi1[i] = 21 + i;
+ vl1[i] = 23L + i;
+ }
+ for (i = 0; i < N; i++)
+ {
+ asm ("");
+ k[i] = (i % 2) ? (N / 2 + i) : (N / 2 - i / 2);
+ l[i] = 2 * i + i % 2;
+ }
+
+ f1 ();
+ f2 ();
+ for (i = 0; i < N; i++)
+ if (vf2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 17
+ || vi2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 21)
+ abort ();
+
+ f3 (12);
+ f4 (14);
+ for (i = 0; i < N; i++)
+ if (vf2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 12] != i + 17
+ || vi2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 14] != i + 21)
+ abort ();
+
+ f5 ();
+ f6 ();
+ for (i = 0; i < N; i++)
+ if (vd2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 19
+ || vl2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 23)
+ abort ();
+
+ f7 (7);
+ f8 (9);
+ for (i = 0; i < N; i++)
+ if (vd2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 7] != i + 19
+ || vl2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 9] != i + 23)
+ abort ();
+
+ f9 ();
+ f10 ();
+ for (i = 0; i < N; i++)
+ if (vf2[2 * i + i % 2] != i + 17
+ || vi2[2 * i + i % 2] != i + 21)
+ abort ();
+
+ f11 (2);
+ f12 (4);
+ for (i = 0; i < N; i++)
+ if (vf2[2 * i + i % 2 + 2] != i + 17
+ || vi2[2 * i + i % 2 + 4] != i + 21)
+ abort ();
+
+ f13 ();
+ f14 ();
+ for (i = 0; i < N; i++)
+ if (vd2[2 * i + i % 2] != i + 19
+ || vl2[2 * i + i % 2] != i + 23)
+ abort ();
+
+ f15 (13);
+ f16 (15);
+ for (i = 0; i < N; i++)
+ if (vd2[2 * i + i % 2 + 13] != i + 19
+ || vl2[2 * i + i % 2 + 15] != i + 23)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mavx512f" } */
+
+#define AVX512F
+
+#include "avx512f-check.h"
+
+#define N 1024
+float vf1[N], vf2[2*N+16];
+double vd1[N], vd2[2*N+16];
+int k[N];
+long l[N];
+short n[2*N+16];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[k[i]] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ n[k[i]] = (int) vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[k[i] + x] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f4 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ n[k[i] + x] = (int) vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[k[i]] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ n[k[i]] = (int) vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[k[i] + x] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ n[k[i] + x] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f9 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[l[i]] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f10 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ n[l[i]] = (int) vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f11 (long x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[l[i] + x] = vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f12 (long x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ n[l[i] + x] = (int) vf1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f13 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[l[i]] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f14 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ n[l[i]] = (int) vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f15 (long x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[l[i] + x] = vd1[i];
+}
+
+__attribute__((noinline, noclone)) void
+f16 (long x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ n[l[i] + x] = (int) vd1[i];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ asm ("");
+ vf1[i] = 17.0f + i;
+ vd1[i] = 19.0 + i;
+ }
+ for (i = 0; i < N; i++)
+ {
+ asm ("");
+ k[i] = (i % 2) ? (N / 2 + i) : (N / 2 - i / 2);
+ l[i] = 2 * i + i % 2;
+ }
+
+ f1 ();
+ f2 ();
+ for (i = 0; i < N; i++)
+ if (vf2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 17
+ || n[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 17)
+ abort ();
+
+ f3 (12);
+ f4 (14);
+ for (i = 0; i < N; i++)
+ if (vf2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 12] != i + 17
+ || n[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 14] != i + 17)
+ abort ();
+
+ f5 ();
+ f6 ();
+ for (i = 0; i < N; i++)
+ if (vd2[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 19
+ || n[(i % 2) ? (N / 2 + i) : (N / 2 - i / 2)] != i + 19)
+ abort ();
+
+ f7 (7);
+ f8 (9);
+ for (i = 0; i < N; i++)
+ if (vd2[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 7] != i + 19
+ || n[((i % 2) ? (N / 2 + i) : (N / 2 - i / 2)) + 9] != i + 19)
+ abort ();
+
+ f9 ();
+ f10 ();
+ for (i = 0; i < N; i++)
+ if (vf2[2 * i + i % 2] != i + 17
+ || n[2 * i + i % 2] != i + 17)
+ abort ();
+
+ f11 (2);
+ f12 (4);
+ for (i = 0; i < N; i++)
+ if (vf2[2 * i + i % 2 + 2] != i + 17
+ || n[2 * i + i % 2 + 4] != i + 17)
+ abort ();
+
+ f13 ();
+ f14 ();
+ for (i = 0; i < N; i++)
+ if (vd2[2 * i + i % 2] != i + 19
+ || n[2 * i + i % 2] != i + 19)
+ abort ();
+
+ f15 (13);
+ f16 (15);
+ for (i = 0; i < N; i++)
+ if (vd2[2 * i + i % 2 + 13] != i + 19
+ || n[2 * i + i % 2 + 15] != i + 19)
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mavx512f" } */
+
+#define AVX512F
+
+#include "avx512f-check.h"
+
+#define N 1024
+int a[N], b[N];
+
+__attribute__((noinline, noclone)) void
+foo (float *__restrict p, float *__restrict q,
+ int s1, int s2, int s3)
+{
+ int i;
+ for (i = 0; i < (N / 8); i++)
+ p[a[i] * s1 + b[i] * s2 + s3] = q[i];
+}
+
+static void
+avx512f_test (void)
+{
+ int i;
+ float c[N], d[N];
+ for (i = 0; i < N; i++)
+ {
+ a[i] = (i * 7) & (N / 8 - 1);
+ b[i] = (i * 13) & (N / 8 - 1);
+ c[i] = 179.13 + i;
+ }
+ foo (d, c, 3, 2, 4);
+ for (i = 0; i < (N / 8); i++)
+ if (d[a[i] * 3 + b[i] * 2 + 4] != (float) (179.13 + i))
+ abort ();
+}