Update extract_even_odd w/ AVX-512BW insns.
authorKirill Yukhin <kirill.yukhin@intel.com>
Fri, 2 Oct 2015 14:36:41 +0000 (14:36 +0000)
committerKirill Yukhin <kyukhin@gcc.gnu.org>
Fri, 2 Oct 2015 14:36:41 +0000 (14:36 +0000)
gcc/
* config/i386/i386.c (expand_vec_perm_even_odd_trunc): New.
(expand_vec_perm_even_odd_1): Handle V64QImode.
(ix86_expand_vec_perm_const_1): Try expansion with
expand_vec_perm_even_odd_trunc as well.
* config/i386/sse.md (VI124_AVX512F): Rename to ...
(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW): This. Extend
to V54QI.
(define_mode_iterator VI248_AVX2_8_AVX512F): Rename to ...
(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW): This. Extend
to V32HI and V16SI.
(define_insn "avx512bw_<code>v32hiv32qi2"): Unhide pattern name.
(define_expand "vec_pack_trunc_<mode>"): Update iterator name.
(define_expand "vec_unpacks_lo_<mode>"): Ditto.
(define_expand "vec_unpacks_hi_<mode>"): Ditto.
(define_expand "vec_unpacku_lo_<mode>"): Ditto.
(define_expand "vec_unpacku_hi_<mode>"): Ditto.

gcc/testsuite/
* gcc.target/i386/vect-pack-trunc-1.c: New test.
* gcc.target/i386/vect-pack-trunc-2.c: Ditto.
* gcc.target/i386/vect-perm-even-1.c: Ditto.
* gcc.target/i386/vect-perm-odd-1.c: Ditto.
* gcc.target/i386/vect-unpack-1.c: Ditto.
* gcc.target/i386/vect-unpack-2.c: Ditto.

From-SVN: r228394

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/vect-perm-even-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/vect-unpack-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/vect-unpack-2.c [new file with mode: 0644]

index 3adc4cfb9286c25bcbe50faaaf7c55198a90994f..152872207da37186f9d83f6ca8748aa917329432 100644 (file)
@@ -1,3 +1,22 @@
+2015-10-02  Kirill Yukhin  <kirill.yukhin@intel.com>
+
+       * config/i386/i386.c (expand_vec_perm_even_odd_trunc): New.
+       (expand_vec_perm_even_odd_1): Handle V64QImode.
+       (ix86_expand_vec_perm_const_1): Try expansion with
+       expand_vec_perm_even_odd_trunc as well.
+       * config/i386/sse.md (VI124_AVX512F): Rename to ...
+       (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW): This. Extend
+       to V54QI.
+       (define_mode_iterator VI248_AVX2_8_AVX512F): Rename to ...
+       (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW): This. Extend
+       to V32HI and V16SI.
+       (define_insn "avx512bw_<code>v32hiv32qi2"): Unhide pattern name.
+       (define_expand "vec_pack_trunc_<mode>"): Update iterator name.
+       (define_expand "vec_unpacks_lo_<mode>"): Ditto.
+       (define_expand "vec_unpacks_hi_<mode>"): Ditto.
+       (define_expand "vec_unpacku_lo_<mode>"): Ditto.
+       (define_expand "vec_unpacku_hi_<mode>"): Ditto.
+
 2015-10-02  Kirill Yukhin  <kirill.yukhin@intel.com>
 
        * doc/invoke.texi: Mention -mavx512vl, -mavx512bw, -mavx512dq,
index cfeba76e8f288c1127b315d6ca5d4286e7359e51..1ccc33e525c51e370fd47e8aba0655a0931f42aa 100644 (file)
@@ -50171,6 +50171,62 @@ expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
+   and extract-odd permutations of two V64QI operands
+   with two "shifts", two "truncs" and one "concat" insns for "odd"
+   and two "truncs" and one concat insn for "even."
+   Have already failed all two instruction sequences.  */
+
+static bool
+expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
+{
+  rtx t1, t2, t3, t4;
+  unsigned i, odd, nelt = d->nelt;
+
+  if (!TARGET_AVX512BW
+      || d->one_operand_p
+      || d->vmode != V64QImode)
+    return false;
+
+  /* Check that permutation is even or odd.  */
+  odd = d->perm[0];
+  if (odd > 1)
+    return false;
+
+  for (i = 1; i < nelt; ++i)
+    if (d->perm[i] != 2 * i + odd)
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+
+  if (odd)
+    {
+      t1 = gen_reg_rtx (V32HImode);
+      t2 = gen_reg_rtx (V32HImode);
+      emit_insn (gen_lshrv32hi3 (t1,
+                                gen_lowpart (V32HImode, d->op0),
+                                GEN_INT (8)));
+      emit_insn (gen_lshrv32hi3 (t2,
+                                gen_lowpart (V32HImode, d->op1),
+                                GEN_INT (8)));
+    }
+  else
+    {
+      t1 = gen_lowpart (V32HImode, d->op0);
+      t2 = gen_lowpart (V32HImode, d->op1);
+    }
+
+  t3 = gen_reg_rtx (V32QImode);
+  t4 = gen_reg_rtx (V32QImode);
+  emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
+  emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
+  emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
+
+  return true;
+}
+
 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement extract-even
    and extract-odd permutations.  */
 
@@ -50273,6 +50329,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
     case V32QImode:
       return expand_vec_perm_even_odd_pack (d);
 
+    case V64QImode:
+      return expand_vec_perm_even_odd_trunc (d);
+
     case V4DImode:
       if (!TARGET_AVX2)
        {
@@ -50734,6 +50793,8 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 
   /* Try sequences of four instructions.  */
 
+  if (expand_vec_perm_even_odd_trunc (d))
+    return true;
   if (expand_vec_perm_vpshufb2_vpermq (d))
     return true;
 
index 4eefb4529b87980e2bf231117fda2b448d10ddb8..013681ca91818ff5c67da7b173994e4203001c42 100644 (file)
   [(V16HI "TARGET_AVX2") V8HI
    (V8SI "TARGET_AVX2") V4SI])
 
-(define_mode_iterator VI124_AVX512F
-  [(V32QI "TARGET_AVX2") V16QI
+(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
    (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
 
   [(V8SI "TARGET_AVX2") V4SI
    (V4DI "TARGET_AVX2") V2DI])
 
-(define_mode_iterator VI248_AVX2_8_AVX512F
-  [(V16HI "TARGET_AVX2") V8HI
-   (V8SI "TARGET_AVX2") V4SI
+(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
+  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
    (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI248_AVX512BW_AVX512VL
       (match_operand:<avx512fmaskmode> 2 "register_operand")))]
   "TARGET_AVX512F")
 
-(define_insn "*avx512bw_<code>v32hiv32qi2"
+(define_insn "avx512bw_<code>v32hiv32qi2"
   [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
        (any_truncate:V32QI
            (match_operand:V32HI 1 "register_operand" "v,v")))]
 
 (define_expand "vec_pack_trunc_<mode>"
   [(match_operand:<ssepackmode> 0 "register_operand")
-   (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
-   (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
+   (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
+   (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
   "TARGET_SSE2"
 {
   rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
 
 (define_expand "vec_unpacks_lo_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI124_AVX512F 1 "register_operand")]
+   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
 
 (define_expand "vec_unpacks_hi_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI124_AVX512F 1 "register_operand")]
+   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
 
 (define_expand "vec_unpacku_lo_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI124_AVX512F 1 "register_operand")]
+   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
 
 (define_expand "vec_unpacku_hi_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI124_AVX512F 1 "register_operand")]
+   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
 
index ced7d33bfaf5eeee1cedfde1e5dcbbf21918aab4..efea8e42a49ca05b498bc6694fe631247a66bd4e 100644 (file)
@@ -1,3 +1,12 @@
+2015-10-02  Kirill Yukhin  <kirill.yukhin@intel.com>
+
+       * gcc.target/i386/vect-pack-trunc-1.c: New test.
+       * gcc.target/i386/vect-pack-trunc-2.c: Ditto.
+       * gcc.target/i386/vect-perm-even-1.c: Ditto.
+       * gcc.target/i386/vect-perm-odd-1.c: Ditto.
+       * gcc.target/i386/vect-unpack-1.c: Ditto.
+       * gcc.target/i386/vect-unpack-2.c: Ditto.
+
 2015-10-02  Marek Polacek  <polacek@redhat.com>
 
        PR c/64249
diff --git a/gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-1.c
new file mode 100644 (file)
index 0000000..774d4bd
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 400
+unsigned char yy[10000];
+
+void
+__attribute__ ((noinline)) foo (unsigned short s)
+{
+   unsigned short i;
+   for (i = 0; i < s; i++)
+     yy[i] = (unsigned char) i;
+}
+
+void
+avx512bw_test ()
+{
+  unsigned short i;
+  foo (N);
+
+  for (i = 0; i < N; i++)
+    if ( (unsigned char)i != yy [i] )
+      abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c b/gcc/testsuite/gcc.target/i386/vect-pack-trunc-2.c
new file mode 100644 (file)
index 0000000..a1a075f
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 400
+unsigned short yy[10000];
+
+void
+__attribute__ ((noinline)) foo (unsigned int s)
+{
+   unsigned int i;
+   for (i = 0; i < s; i++)
+     yy[i] = (unsigned short) i;
+}
+
+void
+avx512bw_test ()
+{
+  unsigned int i;
+  foo (N);
+  for (i = 0; i < N; i++)
+    if ( (unsigned short)i != yy [i] )
+      abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpermi2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-perm-even-1.c b/gcc/testsuite/gcc.target/i386/vect-perm-even-1.c
new file mode 100644 (file)
index 0000000..a2ff73d
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 400
+unsigned char yy[10000];
+unsigned char xx[10000];
+
+void
+__attribute__ ((noinline)) foo (unsigned short s)
+{
+   unsigned short i;
+   for (i = 0; i < s; i++)
+     yy[i] = xx [i*2 + 1];
+}
+
+void
+avx512bw_test ()
+{
+  unsigned short i;
+  unsigned char j = 0;
+  for (i = 0; i < 2 * N + 1; i++, j++)
+    xx [i] = j;
+
+  foo (N);
+
+  for (i = 0; i < N; i++)
+    if ( (unsigned char)(2*i+1) != yy [i] )
+      abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c b/gcc/testsuite/gcc.target/i386/vect-perm-odd-1.c
new file mode 100644 (file)
index 0000000..65f1a80
--- /dev/null
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 400
+
+typedef struct
+{
+  unsigned char real;
+  unsigned char imag;
+} complex8_t;
+
+void
+__attribute__ ((noinline)) foo (unsigned char *a,
+                               complex8_t *x, unsigned len)
+{
+  unsigned i;
+  for (i = 0; i < len; i++)
+    a[i] = x[i].imag + x[i].real;
+}
+
+void
+avx512bw_test ()
+{
+  unsigned short i;
+  unsigned char j = 0;
+  complex8_t x [N];
+  unsigned char a [N];
+
+  for (i = 0; i < N; i++, j++)
+    {
+      x [i].real = j;
+      x [i].imag = j;
+    }
+
+  foo (a, x, N);
+
+  j = 0;
+  for (i = 0; i < N; i++, j++)
+    if ( a[i] != (unsigned char)(j+j) )
+      abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-1.c b/gcc/testsuite/gcc.target/i386/vect-unpack-1.c
new file mode 100644 (file)
index 0000000..eedca47
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 255
+unsigned int yy[10000];
+
+void
+__attribute__ ((noinline)) foo (unsigned char s)
+{
+   unsigned char i;
+   for (i = 0; i < s; i++)
+     yy[i] = (unsigned int) i;
+}
+
+void
+avx512bw_test ()
+{
+  unsigned char i;
+  foo (N);
+  for (i = 0; i < N; i++)
+    if ( (unsigned int)i != yy [i] )
+      abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \\t\]+\[^\n\]*%zmm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-unpack-2.c b/gcc/testsuite/gcc.target/i386/vect-unpack-2.c
new file mode 100644 (file)
index 0000000..b825f0c
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -mavx512bw -save-temps" } */
+
+#include "avx512bw-check.h"
+
+#define N 120
+signed int yy[10000];
+
+void
+__attribute__ ((noinline)) foo (signed char s)
+{
+   signed char i;
+   for (i = 0; i < s; i++)
+     yy[i] = (signed int) i;
+}
+
+void
+avx512bw_test ()
+{
+  signed char i;
+  foo (N);
+  for (i = 0; i < N; i++)
+    if ( (signed int)i != yy [i] )
+      abort ();
+}
+
+/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\n\]*%zmm" 2 } } */