i386.c (ix86_expand_vec_perm): Add handle one-operand permutation for TARGET_AVX512F.
authorYuri Rumyantsev <ysrumyan@gmail.com>
Mon, 4 Jul 2016 14:06:27 +0000 (14:06 +0000)
committerKirill Yukhin <kyukhin@gcc.gnu.org>
Mon, 4 Jul 2016 14:06:27 +0000 (14:06 +0000)
gcc/
* config/i386/i386.c (ix86_expand_vec_perm): Add handle one-operand
permutation for TARGET_AVX512F.
(ix86_expand_vec_one_operand_perm_avx512): New function.
(expand_vec_perm_1): Invoke introduced function.
* tree-vect-loop.c (vect_transform_loop): Clear-up safelen value since
it may be not valid after vectorization.

gcc/testsuite/
* gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c: New test.
* gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c: New test.

From-SVN: r237982

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c [new file with mode: 0644]

index c021b34409bf5f049c818d8548acdcbed6877d50..76911ff6b8c36af2d7eb8622c08485555a580a43 100644 (file)
@@ -1,3 +1,12 @@
+2016-07-04  Yuri Rumyantsev  <ysrumyan@gmail.com>
+
+       * config/i386/i386.c (ix86_expand_vec_perm): Add handle one-operand
+       permutation for TARGET_AVX512F.
+       (ix86_expand_vec_one_operand_perm_avx512): New function.
+       (expand_vec_perm_1): Invoke introduced function.
+       * tree-vect-loop.c (vect_transform_loop): Clear-up safelen value since
+       it may be not valid after vectorization.
+
 2016-07-04  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
 
        PR target/63874
index 70b13c81bc8713ea51f79b4c510e080482a4c237..9eaf4144965a186c990938b78c37ca57e06b7d78 100644 (file)
@@ -24307,6 +24307,33 @@ ix86_expand_vec_perm (rtx operands[])
   e = GET_MODE_UNIT_SIZE (mode);
   gcc_assert (w <= 64);
 
+  if (TARGET_AVX512F && one_operand_shuffle)
+    {
+      rtx (*gen) (rtx, rtx, rtx) = NULL;
+      switch (mode)
+       {
+       case V16SImode:
+         gen =gen_avx512f_permvarv16si;
+         break;
+       case V16SFmode:
+         gen = gen_avx512f_permvarv16sf;
+         break;
+       case V8DImode:
+         gen = gen_avx512f_permvarv8di;
+         break;
+       case V8DFmode:
+         gen = gen_avx512f_permvarv8df;
+         break;
+       default:
+         break;
+       }
+      if (gen != NULL)
+       {
+         emit_insn (gen (target, op0, mask));
+         return;
+       }
+    }
+
   if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
     return;
 
@@ -50444,6 +50471,52 @@ canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
   return true;
 }
 
+/* Try to expand one-operand permutation with constant mask.  */
+
+static bool
+ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
+{
+  machine_mode mode = GET_MODE (d->op0);
+  machine_mode maskmode = mode;
+  rtx (*gen) (rtx, rtx, rtx) = NULL;
+  rtx target, op0, mask;
+  rtx vec[64];
+
+  if (!rtx_equal_p (d->op0, d->op1))
+    return false;
+
+  if (!TARGET_AVX512F)
+    return false;
+
+  switch (mode)
+    {
+    case V16SImode:
+      gen = gen_avx512f_permvarv16si;
+      break;
+    case V16SFmode:
+      gen = gen_avx512f_permvarv16sf;
+      maskmode = V16SImode;
+      break;
+    case V8DImode:
+      gen = gen_avx512f_permvarv8di;
+      break;
+    case V8DFmode:
+      gen = gen_avx512f_permvarv8df;
+      maskmode = V8DImode;
+      break;
+    default:
+      return false;
+    }
+
+  target = d->target;
+  op0 = d->op0;
+  for (int i = 0; i < d->nelt; ++i)
+    vec[i] = GEN_INT (d->perm[i]);
+  mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
+  emit_insn (gen (target, op0, force_reg (maskmode, mask)));
+  return true;
+}
+
 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
    in a single instruction.  */
 
@@ -50611,6 +50684,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
   if (expand_vec_perm_palignr (d, true))
     return true;
 
+  /* Try the AVX512F vperm{s,d} instructions.  */
+  if (ix86_expand_vec_one_operand_perm_avx512 (d))
+    return true;
+
   /* Try the AVX512F vpermi2 instructions.  */
   if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
     return true;
index 23e010ed834af32f0ddf98c67833924858045946..95d850ae69dac828c0bf4dcf94d01573d656de12 100644 (file)
@@ -1,3 +1,8 @@
+2016-07-04  Yuri Rumyantsev  <ysrumyan@gmail.com>
+
+       * gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c: New test.
+       * gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c: New test.
+
 2016-07-04  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
 
        PR target/63874
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c
new file mode 100644 (file)
index 0000000..ea6760d
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */\r
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=knl" } */\r
+/* { dg-final { scan-assembler-times "vpermps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */\r
+\r
+#define N 1024\r
+float f1[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));\r
+float f2[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));\r
+\r
+void foo ()\r
+{\r
+  int j;\r
+  for (j=0; j<N; j++)\r
+    f1[j] += f2[N-j];\r
+}\r
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-2.c
new file mode 100644 (file)
index 0000000..29d00d7
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */\r
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=knl" } */\r
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */\r
+\r
+#define N 1024\r
+double d1[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));\r
+double d2[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));\r
+\r
+void foo ()\r
+{\r
+  int j;\r
+  for (j=0; j<N; j++)\r
+    d1[j] += d2[N-j];\r
+}\r