re PR target/88513 (FAIL: gcc.target/i386/pr59591-1.c)
authorJakub Jelinek <jakub@redhat.com>
Tue, 18 Dec 2018 11:22:00 +0000 (12:22 +0100)
committerJakub Jelinek <jakub@gcc.gnu.org>
Tue, 18 Dec 2018 11:22:00 +0000 (12:22 +0100)
PR target/88513
PR target/88514
* optabs.def (vec_pack_sbool_trunc_optab, vec_unpacks_sbool_hi_optab,
vec_unpacks_sbool_lo_optab): New optabs.
* optabs.c (expand_widen_pattern_expr): Use vec_unpacks_sbool_*_optab
and pass additional argument if both input and target have the same
scalar mode of VECTOR_BOOLEAN_TYPE_P vectors.
* expr.c (expand_expr_real_2) <case VEC_PACK_TRUNC_EXPR>: Handle
VECTOR_BOOLEAN_TYPE_P pack where result has the same scalar mode
as the operands using vec_pack_sbool_trunc_optab.
* tree-vect-stmts.c (supportable_widening_operation): Use
vec_unpacks_sbool_{lo,hi}_optab for VECTOR_BOOLEAN_TYPE_P conversions
where both wider_vectype and vectype have the same scalar mode.
(supportable_narrowing_operation): Similarly use
vec_pack_sbool_trunc_optab if narrow_vectype and vectype have the same
scalar mode.
* config/i386/i386.c (ix86_get_builtin)
<case IX86_BUILTIN_GATHER3ALTDIV8SF>: Check for VECTOR_MODE_P
rather than non-VOIDmode.
* config/i386/sse.md (vec_pack_trunc_qi, vec_pack_trunc_<mode>):
Remove useless ()s around "register_operand", formatting fixes.
(vec_pack_sbool_trunc_qi, vec_unpacks_sbool_lo_qi,
vec_unpacks_sbool_hi_qi): New expanders.
* doc/md.texi (vec_pack_sbool_trunc_M, vec_unpacks_sbool_hi_M,
vec_unpacks_sbool_lo_M): Document.

* gcc.target/i386/avx512f-pr88513-1.c: New test.
* gcc.target/i386/avx512f-pr88513-2.c: New test.
* gcc.target/i386/avx512vl-pr88464-1.c: New test.
* gcc.target/i386/avx512vl-pr88464-2.c: New test.
* gcc.target/i386/avx512vl-pr88464-3.c: New test.
* gcc.target/i386/avx512vl-pr88464-4.c: New test.
* gcc.target/i386/avx512vl-pr88513-1.c: New test.
* gcc.target/i386/avx512vl-pr88513-2.c: New test.
* gcc.target/i386/avx512vl-pr88513-3.c: New test.
* gcc.target/i386/avx512vl-pr88513-4.c: New test.
* gcc.target/i386/avx512vl-pr88514-1.c: New test.
* gcc.target/i386/avx512vl-pr88514-2.c: New test.
* gcc.target/i386/avx512vl-pr88514-3.c: New test.

From-SVN: r267228

22 files changed:
gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/sse.md
gcc/doc/md.texi
gcc/expr.c
gcc/optabs.c
gcc/optabs.def
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512f-pr88513-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-pr88513-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88464-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88513-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88513-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88513-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88513-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88514-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88514-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr88514-3.c [new file with mode: 0644]
gcc/tree-vect-stmts.c

index e24bb94baa0dc84a9f22c2b14bc2e2df35141d9f..6f7c79d31a11e33cdf1f525991b60cdb4c7a115f 100644 (file)
@@ -1,3 +1,31 @@
+2018-12-18  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/88513
+       PR target/88514
+       * optabs.def (vec_pack_sbool_trunc_optab, vec_unpacks_sbool_hi_optab,
+       vec_unpacks_sbool_lo_optab): New optabs.
+       * optabs.c (expand_widen_pattern_expr): Use vec_unpacks_sbool_*_optab
+       and pass additional argument if both input and target have the same
+       scalar mode of VECTOR_BOOLEAN_TYPE_P vectors.
+       * expr.c (expand_expr_real_2) <case VEC_PACK_TRUNC_EXPR>: Handle
+       VECTOR_BOOLEAN_TYPE_P pack where result has the same scalar mode
+       as the operands using vec_pack_sbool_trunc_optab.
+       * tree-vect-stmts.c (supportable_widening_operation): Use
+       vec_unpacks_sbool_{lo,hi}_optab for VECTOR_BOOLEAN_TYPE_P conversions
+       where both wider_vectype and vectype have the same scalar mode.
+       (supportable_narrowing_operation): Similarly use
+       vec_pack_sbool_trunc_optab if narrow_vectype and vectype have the same
+       scalar mode.
+       * config/i386/i386.c (ix86_get_builtin)
+       <case IX86_BUILTIN_GATHER3ALTDIV8SF>: Check for VECTOR_MODE_P
+       rather than non-VOIDmode.
+       * config/i386/sse.md (vec_pack_trunc_qi, vec_pack_trunc_<mode>):
+       Remove useless ()s around "register_operand", formatting fixes.
+       (vec_pack_sbool_trunc_qi, vec_unpacks_sbool_lo_qi,
+       vec_unpacks_sbool_hi_qi): New expanders.
+       * doc/md.texi (vec_pack_sbool_trunc_M, vec_unpacks_sbool_hi_M,
+       vec_unpacks_sbool_lo_M): Document.
+
 2018-12-18  Jozef Lawrynowicz  <jozef.l@mittosystems.com>
 
        * combine.c (update_rsp_from_reg_equal): Only look for the nonzero bits
index 1a4c407040ac03ed085800befe7f72490dcfa8e8..1c36e12c79c801ad8d64bc17ab5e17865ea89a53 100644 (file)
@@ -37625,7 +37625,7 @@ rdseed_step:
            op0 = copy_to_mode_reg (GET_MODE (op0), op0);
          emit_insn (gen (half, op0));
          op0 = half;
-         if (GET_MODE (op3) != VOIDmode)
+         if (VECTOR_MODE_P (GET_MODE (op3)))
            {
              half = gen_reg_rtx (mode0);
              if (!nonimmediate_operand (op3, GET_MODE (op3)))
index 877759c826efeca18d120e791cfb9b701c8bd882..3786afdf5e8087ad260aeb25a0e88c5323e64838 100644 (file)
 })
 
 (define_expand "vec_pack_trunc_qi"
-  [(set (match_operand:HI 0 ("register_operand"))
-        (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand")))
+  [(set (match_operand:HI 0 "register_operand")
+       (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
                            (const_int 8))
-                (zero_extend:HI (match_operand:QI 1 ("register_operand")))))]
+               (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
   "TARGET_AVX512F")
 
 (define_expand "vec_pack_trunc_<mode>"
-  [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
-        (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))
-                           (match_dup 3))
-                (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))]
+  [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
+       (ior:<DOUBLEMASKMODE>
+         (ashift:<DOUBLEMASKMODE>
+           (zero_extend:<DOUBLEMASKMODE>
+             (match_operand:SWI24 2 "register_operand"))
+           (match_dup 3))
+         (zero_extend:<DOUBLEMASKMODE>
+           (match_operand:SWI24 1 "register_operand"))))]
   "TARGET_AVX512BW"
 {
   operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
 })
 
+(define_expand "vec_pack_sbool_trunc_qi"
+  [(match_operand:QI 0 "register_operand")
+   (match_operand:QI 1 "register_operand")
+   (match_operand:QI 2 "register_operand")
+   (match_operand:QI 3 "const_int_operand")]
+  "TARGET_AVX512F"
+{
+  HOST_WIDE_INT nunits = INTVAL (operands[3]);
+  rtx mask, tem1, tem2;
+  if (nunits != 8 && nunits != 4)
+    FAIL;
+  mask = gen_reg_rtx (QImode);
+  emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
+  tem1 = gen_reg_rtx (QImode);
+  emit_insn (gen_kandqi (tem1, operands[1], mask));
+  if (TARGET_AVX512DQ)
+    {
+      tem2 = gen_reg_rtx (QImode);
+      emit_insn (gen_kashiftqi (tem2, operands[2],
+                               GEN_INT (nunits / 2)));
+    }
+  else
+    {
+      tem2 = gen_reg_rtx (HImode);
+      emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
+                                                     QImode),
+                               GEN_INT (nunits / 2)));
+      tem2 = lowpart_subreg (QImode, tem2, HImode);
+    }
+  emit_insn (gen_kiorqi (operands[0], tem1, tem2));
+  DONE;
+})
+
 (define_insn "<sse2_avx2>_packsswb<mask_name>"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
        (vec_concat:VI1_AVX512
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
 
+(define_expand "vec_unpacks_sbool_lo_qi"
+  [(match_operand:QI 0 "register_operand")
+   (match_operand:QI 1 "register_operand")
+   (match_operand:QI 2 "const_int_operand")]
+  "TARGET_AVX512F"
+{
+  if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
+    FAIL;
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
 (define_expand "vec_unpacks_lo_hi"
   [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
         (match_operand:HI 1 "register_operand"))]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
 
+(define_expand "vec_unpacks_sbool_hi_qi"
+  [(match_operand:QI 0 "register_operand")
+   (match_operand:QI 1 "register_operand")
+   (match_operand:QI 2 "const_int_operand")]
+  "TARGET_AVX512F"
+{
+  HOST_WIDE_INT nunits = INTVAL (operands[2]);
+  if (nunits != 8 && nunits != 4)
+    FAIL;
+  if (TARGET_AVX512DQ)
+    emit_insn (gen_klshiftrtqi (operands[0], operands[1],
+                               GEN_INT (nunits / 2)));
+  else
+    {
+      rtx tem = gen_reg_rtx (HImode);
+      emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
+                                                      QImode),
+                                 GEN_INT (nunits / 2)));
+      emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
+    }
+  DONE;
+})
+
 (define_expand "vec_unpacks_hi_hi"
   [(parallel
      [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
index aa90576b34d15d0821ada4bf0c537043eaa1cf96..197b6ba339f6f1f87c2883d58a36dc4df56f5b6e 100644 (file)
@@ -5428,6 +5428,16 @@ are vectors of the same mode having N integral or floating point elements
 of size S@.  Operand 0 is the resulting vector in which 2*N elements of
 size N/2 are concatenated after narrowing them down using truncation.
 
+@cindex @code{vec_pack_sbool_trunc_@var{m}} instruction pattern
+@item @samp{vec_pack_sbool_trunc_@var{m}}
+Narrow and merge the elements of two vectors.  Operands 1 and 2 are vectors
+of the same type having N boolean elements.  Operand 0 is the resulting
+vector in which 2*N elements are concatenated.  The last operand (operand 3)
+is the number of elements in the output vector 2*N as a @code{CONST_INT}.
+This instruction pattern is used when all the vector input and output
+operands have the same scalar mode @var{m} and thus using
+@code{vec_pack_trunc_@var{m}} would be ambiguous.
+
 @cindex @code{vec_pack_ssat_@var{m}} instruction pattern
 @cindex @code{vec_pack_usat_@var{m}} instruction pattern
 @item @samp{vec_pack_ssat_@var{m}}, @samp{vec_pack_usat_@var{m}}
@@ -5470,6 +5480,17 @@ integral elements.  The input vector (operand 1) has N elements of size S.
 Widen (promote) the high/low elements of the vector using zero extension and
 place the resulting N/2 values of size 2*S in the output vector (operand 0).
 
+@cindex @code{vec_unpacks_sbool_hi_@var{m}} instruction pattern
+@cindex @code{vec_unpacks_sbool_lo_@var{m}} instruction pattern
+@item @samp{vec_unpacks_sbool_hi_@var{m}}, @samp{vec_unpacks_sbool_lo_@var{m}}
+Extract the high/low part of a vector of boolean elements that have scalar
+mode @var{m}.  The input vector (operand 1) has N elements, the output
+vector (operand 0) has N/2 elements.  The last operand (operand 2) is the
+number of elements of the input vector N as a @code{CONST_INT}.  These
+patterns are used if both the input and output vectors have the same scalar
+mode @var{m} and thus using @code{vec_unpacks_hi_@var{m}} or
+@code{vec_unpacks_lo_@var{m}} would be ambiguous.
+
 @cindex @code{vec_unpacks_float_hi_@var{m}} instruction pattern
 @cindex @code{vec_unpacks_float_lo_@var{m}} instruction pattern
 @cindex @code{vec_unpacku_float_hi_@var{m}} instruction pattern
index b4a2133ebce3f7481b3d62d8b58da42c43cda871..fe3647f0ac77e1a6bb2bb88ba368c00b21b3a135 100644 (file)
@@ -9493,12 +9493,34 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       gcc_assert (target);
       return target;
 
-    case VEC_PACK_TRUNC_EXPR:
     case VEC_PACK_SAT_EXPR:
     case VEC_PACK_FIX_TRUNC_EXPR:
       mode = TYPE_MODE (TREE_TYPE (treeop0));
       goto binop;
 
+    case VEC_PACK_TRUNC_EXPR:
+      if (VECTOR_BOOLEAN_TYPE_P (type)
+         && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (treeop0))
+         && mode == TYPE_MODE (TREE_TYPE (treeop0))
+         && SCALAR_INT_MODE_P (mode))
+       {
+         struct expand_operand eops[4];
+         machine_mode imode = TYPE_MODE (TREE_TYPE (treeop0));
+         expand_operands (treeop0, treeop1,
+                          subtarget, &op0, &op1, EXPAND_NORMAL);
+         this_optab = vec_pack_sbool_trunc_optab;
+         enum insn_code icode = optab_handler (this_optab, imode);
+         create_output_operand (&eops[0], target, mode);
+         create_convert_operand_from (&eops[1], op0, imode, false);
+         create_convert_operand_from (&eops[2], op1, imode, false);
+         temp = GEN_INT (TYPE_VECTOR_SUBPARTS (type).to_constant ());
+         create_input_operand (&eops[3], temp, imode);
+         expand_insn (icode, 4, eops);
+         return eops[0].value;
+       }
+      mode = TYPE_MODE (TREE_TYPE (treeop0));
+      goto binop;
+
     case VEC_PACK_FLOAT_EXPR:
       mode = TYPE_MODE (TREE_TYPE (treeop0));
       expand_operands (treeop0, treeop1,
index 1f87e4288168c7d49fd55c48345e5e30cf295eb8..68270bdea98c227a8c95cf56cc0043f5af1caeec 100644 (file)
@@ -256,6 +256,7 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
   enum insn_code icode;
   int nops = TREE_CODE_LENGTH (ops->code);
   int op;
+  bool sbool = false;
 
   oprnd0 = ops->op0;
   tmode0 = TYPE_MODE (TREE_TYPE (oprnd0));
@@ -265,6 +266,22 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
        for these ops.  */
     widen_pattern_optab
       = optab_for_tree_code (ops->code, ops->type, optab_default);
+  else if ((ops->code == VEC_UNPACK_HI_EXPR
+           || ops->code == VEC_UNPACK_LO_EXPR)
+          && VECTOR_BOOLEAN_TYPE_P (ops->type)
+          && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (oprnd0))
+          && TYPE_MODE (ops->type) == TYPE_MODE (TREE_TYPE (oprnd0))
+          && SCALAR_INT_MODE_P (TYPE_MODE (ops->type)))
+    {
+      /* For VEC_UNPACK_{LO,HI}_EXPR if the mode of op0 and result is
+        the same scalar mode for VECTOR_BOOLEAN_TYPE_P vectors, use
+        vec_unpacks_sbool_{lo,hi}_optab, so that we can pass in
+        the pattern number of elements in the wider vector.  */
+      widen_pattern_optab
+       = (ops->code == VEC_UNPACK_HI_EXPR
+          ? vec_unpacks_sbool_hi_optab : vec_unpacks_sbool_lo_optab);
+      sbool = true;
+    }
   else
     widen_pattern_optab
       = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
@@ -282,6 +299,12 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
       oprnd1 = ops->op1;
       tmode1 = TYPE_MODE (TREE_TYPE (oprnd1));
     }
+  else if (sbool)
+    {
+      nops = 2;
+      op1 = GEN_INT (TYPE_VECTOR_SUBPARTS (TREE_TYPE (oprnd0)).to_constant ());
+      tmode1 = tmode0;
+    }
 
   /* The last operand is of a wider mode than the rest of the operands.  */
   if (nops == 2)
index 007212f63674e0ce74c878cdab942ce042f16549..3ede65bdaf4abc49053e41f24202d679322239b3 100644 (file)
@@ -338,6 +338,7 @@ OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a")
 OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a")
 OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a")
 OPTAB_D (vec_pack_ufix_trunc_optab, "vec_pack_ufix_trunc_$a")
+OPTAB_D (vec_pack_sbool_trunc_optab, "vec_pack_sbool_trunc_$a")
 OPTAB_D (vec_pack_usat_optab, "vec_pack_usat_$a")
 OPTAB_D (vec_packs_float_optab, "vec_packs_float_$a")
 OPTAB_D (vec_packu_float_optab, "vec_packu_float_$a")
@@ -353,6 +354,8 @@ OPTAB_D (vec_unpacks_float_hi_optab, "vec_unpacks_float_hi_$a")
 OPTAB_D (vec_unpacks_float_lo_optab, "vec_unpacks_float_lo_$a")
 OPTAB_D (vec_unpacks_hi_optab, "vec_unpacks_hi_$a")
 OPTAB_D (vec_unpacks_lo_optab, "vec_unpacks_lo_$a")
+OPTAB_D (vec_unpacks_sbool_hi_optab, "vec_unpacks_sbool_hi_$a")
+OPTAB_D (vec_unpacks_sbool_lo_optab, "vec_unpacks_sbool_lo_$a")
 OPTAB_D (vec_unpacku_float_hi_optab, "vec_unpacku_float_hi_$a")
 OPTAB_D (vec_unpacku_float_lo_optab, "vec_unpacku_float_lo_$a")
 OPTAB_D (vec_unpacku_hi_optab, "vec_unpacku_hi_$a")
index 4f6358b9384273a077344e479711737a41cec2ad..1ed54f4566d50d96610360bbf95e469989e50bb6 100644 (file)
@@ -1,3 +1,21 @@
+2018-12-18  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/88513
+       PR target/88514
+       * gcc.target/i386/avx512f-pr88513-1.c: New test.
+       * gcc.target/i386/avx512f-pr88513-2.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-1.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-2.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-3.c: New test.
+       * gcc.target/i386/avx512vl-pr88464-4.c: New test.
+       * gcc.target/i386/avx512vl-pr88513-1.c: New test.
+       * gcc.target/i386/avx512vl-pr88513-2.c: New test.
+       * gcc.target/i386/avx512vl-pr88513-3.c: New test.
+       * gcc.target/i386/avx512vl-pr88513-4.c: New test.
+       * gcc.target/i386/avx512vl-pr88514-1.c: New test.
+       * gcc.target/i386/avx512vl-pr88514-2.c: New test.
+       * gcc.target/i386/avx512vl-pr88514-3.c: New test.
+
 2018-12-18  Wei Xiao  <wei3.xiao@intel.com>
 
        * g++.target/i386/mv16.C: Handle new march.
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88513-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88513-1.c
new file mode 100644 (file)
index 0000000..12bf709
--- /dev/null
@@ -0,0 +1,16 @@
+/* PR target/88513 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512f -mtune=intel -mprefer-vector-width=512 -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx512f } */
+
+#define CHECK_H "avx512f-check.h"
+
+#include "../../gcc.dg/vect/pr59591-1.c"
+
+#include CHECK_H
+
+static void
+test_512 (void)
+{
+  bar ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88513-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88513-2.c
new file mode 100644 (file)
index 0000000..9f4c279
--- /dev/null
@@ -0,0 +1,16 @@
+/* PR target/88513 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512f -mtune=intel -mprefer-vector-width=512 -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx512f } */
+
+#define CHECK_H "avx512f-check.h"
+
+#include "../../gcc.dg/vect/pr59591-2.c"
+
+#include CHECK_H
+
+static void
+test_512 (void)
+{
+  bar ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c
new file mode 100644 (file)
index 0000000..55a28dd
--- /dev/null
@@ -0,0 +1,7 @@
+/* PR tree-optimization/88464 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+#include "avx512f-pr88464-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-2.c
new file mode 100644 (file)
index 0000000..b5c8205
--- /dev/null
@@ -0,0 +1,20 @@
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512vl } } } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512" } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+
+#include "avx512f-pr88464-2.c"
+
+static void
+test_256 (void)
+{
+  avx512f_test ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c
new file mode 100644 (file)
index 0000000..6b0c8a8
--- /dev/null
@@ -0,0 +1,7 @@
+/* PR tree-optimization/88464 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+
+#include "avx512f-pr88464-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-4.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-4.c
new file mode 100644 (file)
index 0000000..7df6ce3
--- /dev/null
@@ -0,0 +1,20 @@
+/* PR tree-optimization/88464 */
+/* { dg-do run { target { avx512vl } } } */
+/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512" } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+
+#include "avx512f-pr88464-2.c"
+
+static void
+test_256 (void)
+{
+  avx512f_test ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-1.c
new file mode 100644 (file)
index 0000000..a5d144f
--- /dev/null
@@ -0,0 +1,24 @@
+/* PR target/88513 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512vl -mtune=intel -mprefer-vector-width=128 -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+#define CHECK_H "avx512f-check.h"
+
+#include "../../gcc.dg/vect/pr59591-1.c"
+
+#include CHECK_H
+
+static void
+test_256 (void)
+{
+  bar ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-2.c
new file mode 100644 (file)
index 0000000..6eef7de
--- /dev/null
@@ -0,0 +1,24 @@
+/* PR target/88513 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512vl -mtune=intel -mprefer-vector-width=128 -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+#define CHECK_H "avx512f-check.h"
+
+#include "../../gcc.dg/vect/pr59591-2.c"
+
+#include CHECK_H
+
+static void
+test_256 (void)
+{
+  bar ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-3.c
new file mode 100644 (file)
index 0000000..8846820
--- /dev/null
@@ -0,0 +1,24 @@
+/* PR target/88513 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512vl -mtune=intel -mprefer-vector-width=256 -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+#define CHECK_H "avx512f-check.h"
+
+#include "../../gcc.dg/vect/pr59591-1.c"
+
+#include CHECK_H
+
+static void
+test_256 (void)
+{
+  bar ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-4.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-4.c
new file mode 100644 (file)
index 0000000..1f0ae18
--- /dev/null
@@ -0,0 +1,24 @@
+/* PR target/88513 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512vl -mtune=intel -mprefer-vector-width=256 -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512VL
+#define AVX512F_LEN 512
+#define AVX512F_LEN_HALF 256
+#define CHECK_H "avx512f-check.h"
+
+#include "../../gcc.dg/vect/pr59591-2.c"
+
+#include CHECK_H
+
+static void
+test_256 (void)
+{
+  bar ();
+}
+
+static void
+test_128 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-1.c
new file mode 100644 (file)
index 0000000..ba5b5da
--- /dev/null
@@ -0,0 +1,5 @@
+/* PR target/88514 */
+/* { dg-do assemble { target avx512vl } } */
+/* { dg-options "-Ofast -mavx512vl -mtune=intel -mprefer-vector-width=128" } */
+
+#include "avx512vl-pr79299-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-2.c
new file mode 100644 (file)
index 0000000..6128390
--- /dev/null
@@ -0,0 +1,5 @@
+/* PR target/88514 */
+/* { dg-do assemble { target avx512vl } } */
+/* { dg-options "-Ofast -mavx512vl -mtune=intel -mprefer-vector-width=256" } */
+
+#include "avx512vl-pr79299-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-3.c
new file mode 100644 (file)
index 0000000..6614741
--- /dev/null
@@ -0,0 +1,5 @@
+/* PR target/88514 */
+/* { dg-do assemble { target avx512vl } } */
+/* { dg-options "-Ofast -mavx512vl -mtune=intel -mprefer-vector-width=512" } */
+
+#include "avx512vl-pr79299-1.c"
index 589e018d1e223a56d926401344d580f3937698fe..7aa774a0f29bfbfddcb73373a3bc636b31dd3d99 100644 (file)
@@ -10313,6 +10313,17 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
     }
+  else if (CONVERT_EXPR_CODE_P (code)
+          && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
+          && VECTOR_BOOLEAN_TYPE_P (vectype)
+          && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
+          && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
+    {
+      /* If the input and result modes are the same, a different optab
+        is needed where we pass in the number of units in vectype.  */
+      optab1 = vec_unpacks_sbool_lo_optab;
+      optab2 = vec_unpacks_sbool_hi_optab;
+    }
   else
     {
       optab1 = optab_for_tree_code (c1, vectype, optab_default);
@@ -10332,12 +10343,16 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
 
   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
+    {
+      if (!VECTOR_BOOLEAN_TYPE_P (vectype))
+       return true;
       /* For scalar masks we may have different boolean
         vector types having the same QImode.  Thus we
         add additional check for elements number.  */
-    return (!VECTOR_BOOLEAN_TYPE_P (vectype)
-           || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
-                        TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
+      if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
+                   TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
+       return true;
+    }
 
   /* Check if it's a multi-step conversion that can be done using intermediate
      types.  */
@@ -10367,8 +10382,21 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
          = lang_hooks.types.type_for_mode (intermediate_mode,
                                            TYPE_UNSIGNED (prev_type));
 
-      optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
-      optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
+      if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
+         && VECTOR_BOOLEAN_TYPE_P (prev_type)
+         && intermediate_mode == prev_mode
+         && SCALAR_INT_MODE_P (prev_mode))
+       {
+         /* If the input and result modes are the same, a different optab
+            is needed where we pass in the number of units in vectype.  */
+         optab3 = vec_unpacks_sbool_lo_optab;
+         optab4 = vec_unpacks_sbool_hi_optab;
+       }
+      else
+       {
+         optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
+         optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
+       }
 
       if (!optab3 || !optab4
           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
@@ -10386,9 +10414,13 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
 
       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
          && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
-       return (!VECTOR_BOOLEAN_TYPE_P (vectype)
-               || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
-                            TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
+       {
+         if (!VECTOR_BOOLEAN_TYPE_P (vectype))
+           return true;
+         if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
+                       TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
+           return true;
+       }
 
       prev_type = intermediate_type;
       prev_mode = intermediate_mode;
@@ -10441,26 +10473,30 @@ supportable_narrowing_operation (enum tree_code code,
     {
     CASE_CONVERT:
       c1 = VEC_PACK_TRUNC_EXPR;
+      if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
+         && VECTOR_BOOLEAN_TYPE_P (vectype)
+         && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
+         && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
+       optab1 = vec_pack_sbool_trunc_optab;
+      else
+       optab1 = optab_for_tree_code (c1, vectype, optab_default);
       break;
 
     case FIX_TRUNC_EXPR:
       c1 = VEC_PACK_FIX_TRUNC_EXPR;
+      /* The signedness is determined from output operand.  */
+      optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
       break;
 
     case FLOAT_EXPR:
       c1 = VEC_PACK_FLOAT_EXPR;
+      optab1 = optab_for_tree_code (c1, vectype, optab_default);
       break;
 
     default:
       gcc_unreachable ();
     }
 
-  if (code == FIX_TRUNC_EXPR)
-    /* The signedness is determined from output operand.  */
-    optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
-  else
-    optab1 = optab_for_tree_code (c1, vectype, optab_default);
-
   if (!optab1)
     return false;
 
@@ -10471,12 +10507,16 @@ supportable_narrowing_operation (enum tree_code code,
   *code1 = c1;
 
   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
-    /* For scalar masks we may have different boolean
-       vector types having the same QImode.  Thus we
-       add additional check for elements number.  */
-    return (!VECTOR_BOOLEAN_TYPE_P (vectype)
-           || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
-                        TYPE_VECTOR_SUBPARTS (narrow_vectype)));
+    {
+      if (!VECTOR_BOOLEAN_TYPE_P (vectype))
+       return true;
+      /* For scalar masks we may have different boolean
+        vector types having the same QImode.  Thus we
+        add additional check for elements number.  */
+      if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
+                   TYPE_VECTOR_SUBPARTS (narrow_vectype)))
+       return true;
+    }
 
   if (code == FLOAT_EXPR)
     return false;
@@ -10528,9 +10568,15 @@ supportable_narrowing_operation (enum tree_code code,
       else
        intermediate_type
          = lang_hooks.types.type_for_mode (intermediate_mode, uns);
-      interm_optab
-       = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
-                              optab_default);
+      if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
+         && VECTOR_BOOLEAN_TYPE_P (prev_type)
+         && intermediate_mode == prev_mode
+         && SCALAR_INT_MODE_P (prev_mode))
+       interm_optab = vec_pack_sbool_trunc_optab;
+      else
+       interm_optab
+         = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
+                                optab_default);
       if (!interm_optab
          || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
          || insn_data[icode1].operand[0].mode != intermediate_mode
@@ -10542,9 +10588,13 @@ supportable_narrowing_operation (enum tree_code code,
       (*multi_step_cvt)++;
 
       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
-       return (!VECTOR_BOOLEAN_TYPE_P (vectype)
-               || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
-                            TYPE_VECTOR_SUBPARTS (narrow_vectype)));
+       {
+         if (!VECTOR_BOOLEAN_TYPE_P (vectype))
+           return true;
+         if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
+                       TYPE_VECTOR_SUBPARTS (narrow_vectype)))
+           return true;
+       }
 
       prev_mode = intermediate_mode;
       prev_type = intermediate_type;