re PR middle-end/85090 (wrong code with -O2 -fno-tree-dominator-opts -mavx512f -fira...
authorJakub Jelinek <jakub@gcc.gnu.org>
Sun, 1 Apr 2018 06:05:01 +0000 (08:05 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Sun, 1 Apr 2018 06:05:01 +0000 (08:05 +0200)
PR middle-end/85090
* config/i386/sse.md (V): Add V64QI and V32HI for TARGET_AVX512F.
(V_128_256): New mode iterator.
(*avx512dq_vextract<shuffletype>64x2_1 splitter): New define_split.
(*avx512f_vextract<shuffletype>32x4_1 splitter): Likewise.
(xop_pcmov_<mode><avxsizesuffix>): Use V_128_256 mode iterator instead
of V.
* config/i386/i386.c (ix86_expand_vector_set): Improve V32HImode and
V64QImode expansion for !TARGET_AVX512BW && TARGET_AVX512F.

* gcc.target/i386/avx512f-pr85090-1.c: New test.
* gcc.target/i386/avx512f-pr85090-2.c: New test.
* gcc.target/i386/avx512f-pr85090-3.c: New test.
* gcc.target/i386/avx512bw-pr85090-2.c: New test.
* gcc.target/i386/avx512bw-pr85090-3.c: New test.

From-SVN: r258994

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512bw-pr85090-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-pr85090-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-pr85090-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-pr85090-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-pr85090-3.c [new file with mode: 0644]

index 56a63a044b9113fdd4ca2dbd2234cf920391d0f0..ad75aa20ec2c49c35681805d2edcc66eab2bf221 100644 (file)
@@ -1,3 +1,15 @@
+2018-04-01  Jakub Jelinek  <jakub@redhat.com>
+
+       PR middle-end/85090
+       * config/i386/sse.md (V): Add V64QI and V32HI for TARGET_AVX512F.
+       (V_128_256): New mode iterator.
+       (*avx512dq_vextract<shuffletype>64x2_1 splitter): New define_split.
+       (*avx512f_vextract<shuffletype>32x4_1 splitter): Likewise.
+       (xop_pcmov_<mode><avxsizesuffix>): Use V_128_256 mode iterator instead
+       of V.
+       * config/i386/i386.c (ix86_expand_vector_set): Improve V32HImode and
+       V64QImode expansion for !TARGET_AVX512BW && TARGET_AVX512F.
+
 2018-03-31  Segher Boessenkool  <segher@kernel.crashing.org>
 
        PR target/83315
        PR target/84807
        * config/i386/i386.opt: Replace Enforcment with Enforcement.
 
-2018-03-10  Alexandre Oliva <aoliva@redhat.com>
+2018-03-10  Alexandre Oliva  <aoliva@redhat.com>
 
        PR debug/84620
        * dwarf2out.h (dw_val_class): Add dw_val_class_symview.
        (builtin_access::generic_overlap): Be prepared to handle non-array
        base objects.
 
-2018-03-09  Alexandre Oliva <aoliva@redhat.com>
+2018-03-09  Alexandre Oliva  <aoliva@redhat.com>
 
        PR rtl-optimization/84682
        * lra-constraints.c (process_address_1): Check is_address flag
        * doc/gcov.texi: Document usage of profile files.
        * gcov-io.h: Document changes in the format.
 
-2018-03-08  Alexandre Oliva <aoliva@redhat.com>
+2018-03-08  Alexandre Oliva  <aoliva@redhat.com>
 
        PR debug/84404
        PR debug/84408
index 2b2896f7ac6b1c8c7ad96877615eaa16482a3255..337545f127ccd1019bc4c110e89d5cb7383e99f7 100644 (file)
@@ -44085,21 +44085,69 @@ half:
       break;
 
     case E_V32HImode:
-      if (TARGET_AVX512F && TARGET_AVX512BW)
+      if (TARGET_AVX512BW)
        {
          mmode = SImode;
          gen_blendm = gen_avx512bw_blendmv32hi;
        }
+      else if (TARGET_AVX512F)
+       {
+         half_mode = E_V8HImode;
+         n = 8;
+         goto quarter;
+       }
       break;
 
     case E_V64QImode:
-      if (TARGET_AVX512F && TARGET_AVX512BW)
+      if (TARGET_AVX512BW)
        {
          mmode = DImode;
          gen_blendm = gen_avx512bw_blendmv64qi;
        }
+      else if (TARGET_AVX512F)
+       {
+         half_mode = E_V16QImode;
+         n = 16;
+         goto quarter;
+       }
       break;
 
+quarter:
+      /* Compute offset.  */
+      i = elt / n;
+      elt %= n;
+
+      gcc_assert (i <= 3);
+
+      {
+       /* Extract the quarter.  */
+       tmp = gen_reg_rtx (V4SImode);
+       rtx tmp2 = gen_lowpart (V16SImode, target);
+       rtx mask = gen_reg_rtx (QImode);
+
+       emit_move_insn (mask, constm1_rtx);
+       emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i),
+                                                  tmp, mask));
+
+       tmp2 = gen_reg_rtx (half_mode);
+       emit_move_insn (tmp2, gen_lowpart (half_mode, tmp));
+       tmp = tmp2;
+
+       /* Put val in tmp at elt.  */
+       ix86_expand_vector_set (false, tmp, val, elt);
+
+       /* Put it back.  */
+       tmp2 = gen_reg_rtx (V16SImode);
+       rtx tmp3 = gen_lowpart (V16SImode, target);
+       mask = gen_reg_rtx (HImode);
+       emit_move_insn (mask, constm1_rtx);
+       tmp = gen_lowpart (V4SImode, tmp);
+       emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i),
+                                                 tmp3, mask));
+       emit_move_insn (target, gen_lowpart (mode, tmp2));
+      }
+      return;
+
     default:
       break;
     }
index 795d08a87a7c889fed5998026c85b9a7f427c39d..34c007f8f43ca4013059e7592e51f1e68951e097 100644 (file)
 
 ;; All vector modes
 (define_mode_iterator V
-  [(V32QI "TARGET_AVX") V16QI
-   (V16HI "TARGET_AVX") V8HI
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
    (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
    (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
    (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
 (define_mode_iterator V_256
   [V32QI V16HI V8SI V4DI V8SF V4DF])
 
+;; All 128bit and 256bit vector modes
+(define_mode_iterator V_128_256
+  [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
+
 ;; All 512bit vector modes
 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
 
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_split
+  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
+       (vec_select:<ssequartermode>
+         (match_operand:V8FI 1 "register_operand")
+         (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_AVX512DQ && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);")
+
 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
   [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
        (vec_select:<ssequartermode>
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_split
+  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
+       (vec_select:<ssequartermode>
+         (match_operand:V16FI 1 "register_operand")
+         (parallel [(const_int 0) (const_int 1)
+                    (const_int 2) (const_int 3)])))]
+  "TARGET_AVX512F && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);")
+
 (define_mode_attr extract_type_2
   [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
 
 
 ;; XOP parallel XMM conditional moves
 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
-  [(set (match_operand:V 0 "register_operand" "=x,x")
-       (if_then_else:V
-         (match_operand:V 3 "nonimmediate_operand" "x,m")
-         (match_operand:V 1 "register_operand" "x,x")
-         (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
+  [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
+       (if_then_else:V_128_256
+         (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
+         (match_operand:V_128_256 1 "register_operand" "x,x")
+         (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
   "TARGET_XOP"
   "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")])
index d3044eac963d2830cbfeb9fa4793dd8cb2a5202e..7fddf8c271a29cba07e8d87fb2c837242cbedc92 100644 (file)
@@ -1,4 +1,13 @@
-2018-03-31  Alexandre Oliva <aoliva@redhat.com>
+2018-04-01  Jakub Jelinek  <jakub@redhat.com>
+
+       PR middle-end/85090
+       * gcc.target/i386/avx512f-pr85090-1.c: New test.
+       * gcc.target/i386/avx512f-pr85090-2.c: New test.
+       * gcc.target/i386/avx512f-pr85090-3.c: New test.
+       * gcc.target/i386/avx512bw-pr85090-2.c: New test.
+       * gcc.target/i386/avx512bw-pr85090-3.c: New test.
+
+2018-03-31  Alexandre Oliva  <aoliva@redhat.com>
 
        PR c++/85027
        * g++.dg/pr85027.C: New.
        PR sanitizer/85081
        * g++.dg/asan/pr85081.C: New test.
 
-2018-03-28  Alexandre Oliva <aoliva@redhat.com>
+2018-03-28  Alexandre Oliva  <aoliva@redhat.com>
 
        PR c++/84789
        * g++.dg/template/pr84789.C: Adjust for testing with
        PR sanitizer/85029
        * g++.dg/ubsan/pr85029.C: New test.
 
-2018-03-23  Alexandre Oliva <aoliva@redhat.com>
+2018-03-23  Alexandre Oliva  <aoliva@redhat.com>
 
        PR c++/71251
        * g++.dg/cpp0x/pr71251.C: New.
 
        * gcc.dg/builtin-tgmath-3.c: New test.
 
-2018-03-21  Alexandre Oliva <aoliva@redhat.com>
+2018-03-21  Alexandre Oliva  <aoliva@redhat.com>
 
        PR c++/71965
        * g++.dg/concepts/pr71965.C: New.
        * gcc.dg/Wrestrict-10.c: New test.
        * gcc.dg/Wrestrict-11.c: New test.
 
-2018-03-09  Alexandre Oliva <aoliva@redhat.com>
+2018-03-09  Alexandre Oliva  <aoliva@redhat.com>
 
        PR rtl-optimization/84682
        * gcc.dg/torture/pr84682-1.c: New.
        * gcc.dg/torture/pr84746.c: New testcase.
 
 
-2018-03-08  Alexandre Oliva <aoliva@redhat.com>
+2018-03-08  Alexandre Oliva  <aoliva@redhat.com>
        PR debug/84404
        PR debug/84408
        * gcc.dg/graphite/pr84404.c: New.
        PR tree-optimization/84687
        * gcc.dg/pr84687.c: New test.
 
-2018-03-06  Alexandre Oliva <aoliva@redhat.com>
+2018-03-06  Alexandre Oliva  <aoliva@redhat.com>
 
        PR c++/84231
        * g++.dg/pr84231.C: New.
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr85090-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr85090-2.c
new file mode 100644 (file)
index 0000000..b93ae4b
--- /dev/null
@@ -0,0 +1,35 @@
+/* PR middle-end/85090 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mtune=intel -masm=att" } */
+
+typedef short V __attribute__((vector_size (64)));
+
+V
+f1 (V x, int y)
+{
+  x[0] = y;
+  return x;
+}
+
+V
+f2 (V x, int y)
+{
+  x[7] = y;
+  return x;
+}
+
+V
+f3 (V x, int y)
+{
+  x[11] = y;
+  return x;
+}
+
+V
+f4 (V x, int y)
+{
+  x[29] = y;
+  return x;
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastw\t" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr85090-3.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr85090-3.c
new file mode 100644 (file)
index 0000000..9e32a31
--- /dev/null
@@ -0,0 +1,35 @@
+/* PR middle-end/85090 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mtune=intel -masm=att" } */
+
+typedef signed char V __attribute__((vector_size (64)));
+
+V
+f1 (V x, int y)
+{
+  x[0] = y;
+  return x;
+}
+
+V
+f2 (V x, int y)
+{
+  x[15] = y;
+  return x;
+}
+
+V
+f3 (V x, int y)
+{
+  x[22] = y;
+  return x;
+}
+
+V
+f4 (V x, int y)
+{
+  x[59] = y;
+  return x;
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastb\t" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr85090-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr85090-1.c
new file mode 100644 (file)
index 0000000..f5c0fee
--- /dev/null
@@ -0,0 +1,35 @@
+/* PR middle-end/85090 */
+/* { dg-do run { target int128 } } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -fno-tree-dominator-opts -mavx512f -fira-algorithm=priority" } */
+
+#include "avx512f-check.h"
+
+typedef unsigned short U __attribute__ ((vector_size (64)));
+typedef unsigned int V __attribute__ ((vector_size (64)));
+typedef unsigned __int128 W __attribute__ ((vector_size (64)));
+
+V h;
+W d, e, g;
+U f;
+
+static __attribute__((noipa)) U
+foo (U i)
+{
+  f >>= ((U)d > f) & 1;
+  i[0] <<= 1;
+  e = (7 & -d) << (7 & -(g & 7));
+  return i;
+}
+
+void
+avx512f_test (void)
+{
+  U x;
+  for (unsigned i = 0; i < 32; i++)
+    x[i] = i;
+  x = foo (x);
+  for (unsigned i = 0; i < 32; i++)
+    if (x[i] != i)
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr85090-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr85090-2.c
new file mode 100644 (file)
index 0000000..968d2f5
--- /dev/null
@@ -0,0 +1,37 @@
+/* PR middle-end/85090 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mno-avx512bw -mtune=intel -masm=att" } */
+
+typedef short V __attribute__((vector_size (64)));
+
+V
+f1 (V x, int y)
+{
+  x[0] = y;
+  return x;
+}
+
+V
+f2 (V x, int y)
+{
+  x[7] = y;
+  return x;
+}
+
+V
+f3 (V x, int y)
+{
+  x[11] = y;
+  return x;
+}
+
+V
+f4 (V x, int y)
+{
+  x[29] = y;
+  return x;
+}
+
+/* { dg-final { scan-assembler-times "vpinsrw\t" 4 } } */
+/* { dg-final { scan-assembler-times "vextracti32x4\t" 2 } } */
+/* { dg-final { scan-assembler-times "vinserti32x4\t" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr85090-3.c b/gcc/testsuite/gcc.target/i386/avx512f-pr85090-3.c
new file mode 100644 (file)
index 0000000..ffe5154
--- /dev/null
@@ -0,0 +1,37 @@
+/* PR middle-end/85090 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mno-avx512bw -mtune=intel -masm=att" } */
+
+typedef signed char V __attribute__((vector_size (64)));
+
+V
+f1 (V x, int y)
+{
+  x[0] = y;
+  return x;
+}
+
+V
+f2 (V x, int y)
+{
+  x[15] = y;
+  return x;
+}
+
+V
+f3 (V x, int y)
+{
+  x[22] = y;
+  return x;
+}
+
+V
+f4 (V x, int y)
+{
+  x[59] = y;
+  return x;
+}
+
+/* { dg-final { scan-assembler-times "vpinsrb\t" 4 } } */
+/* { dg-final { scan-assembler-times "vextracti32x4\t" 2 } } */
+/* { dg-final { scan-assembler-times "vinserti32x4\t" 4 } } */