From 4714942e0fbd2aa23f12f88d80b93e9b6ff4c041 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 18 Dec 2018 12:22:00 +0100 Subject: [PATCH] re PR target/88513 (FAIL: gcc.target/i386/pr59591-1.c) PR target/88513 PR target/88514 * optabs.def (vec_pack_sbool_trunc_optab, vec_unpacks_sbool_hi_optab, vec_unpacks_sbool_lo_optab): New optabs. * optabs.c (expand_widen_pattern_expr): Use vec_unpacks_sbool_*_optab and pass additional argument if both input and target have the same scalar mode of VECTOR_BOOLEAN_TYPE_P vectors. * expr.c (expand_expr_real_2) : Handle VECTOR_BOOLEAN_TYPE_P pack where result has the same scalar mode as the operands using vec_pack_sbool_trunc_optab. * tree-vect-stmts.c (supportable_widening_operation): Use vec_unpacks_sbool_{lo,hi}_optab for VECTOR_BOOLEAN_TYPE_P conversions where both wider_vectype and vectype have the same scalar mode. (supportable_narrowing_operation): Similarly use vec_pack_sbool_trunc_optab if narrow_vectype and vectype have the same scalar mode. * config/i386/i386.c (ix86_get_builtin) : Check for VECTOR_MODE_P rather than non-VOIDmode. * config/i386/sse.md (vec_pack_trunc_qi, vec_pack_trunc_): Remove useless ()s around "register_operand", formatting fixes. (vec_pack_sbool_trunc_qi, vec_unpacks_sbool_lo_qi, vec_unpacks_sbool_hi_qi): New expanders. * doc/md.texi (vec_pack_sbool_trunc_M, vec_unpacks_sbool_hi_M, vec_unpacks_sbool_lo_M): Document. * gcc.target/i386/avx512f-pr88513-1.c: New test. * gcc.target/i386/avx512f-pr88513-2.c: New test. * gcc.target/i386/avx512vl-pr88464-1.c: New test. * gcc.target/i386/avx512vl-pr88464-2.c: New test. * gcc.target/i386/avx512vl-pr88464-3.c: New test. * gcc.target/i386/avx512vl-pr88464-4.c: New test. * gcc.target/i386/avx512vl-pr88513-1.c: New test. * gcc.target/i386/avx512vl-pr88513-2.c: New test. * gcc.target/i386/avx512vl-pr88513-3.c: New test. * gcc.target/i386/avx512vl-pr88513-4.c: New test. * gcc.target/i386/avx512vl-pr88514-1.c: New test. * gcc.target/i386/avx512vl-pr88514-2.c: New test. * gcc.target/i386/avx512vl-pr88514-3.c: New test. From-SVN: r267228 --- gcc/ChangeLog | 28 +++++ gcc/config/i386/i386.c | 2 +- gcc/config/i386/sse.md | 86 +++++++++++++-- gcc/doc/md.texi | 21 ++++ gcc/expr.c | 24 ++++- gcc/optabs.c | 23 ++++ gcc/optabs.def | 3 + gcc/testsuite/ChangeLog | 18 ++++ .../gcc.target/i386/avx512f-pr88513-1.c | 16 +++ .../gcc.target/i386/avx512f-pr88513-2.c | 16 +++ .../gcc.target/i386/avx512vl-pr88464-1.c | 7 ++ .../gcc.target/i386/avx512vl-pr88464-2.c | 20 ++++ .../gcc.target/i386/avx512vl-pr88464-3.c | 7 ++ .../gcc.target/i386/avx512vl-pr88464-4.c | 20 ++++ .../gcc.target/i386/avx512vl-pr88513-1.c | 24 +++++ .../gcc.target/i386/avx512vl-pr88513-2.c | 24 +++++ .../gcc.target/i386/avx512vl-pr88513-3.c | 24 +++++ .../gcc.target/i386/avx512vl-pr88513-4.c | 24 +++++ .../gcc.target/i386/avx512vl-pr88514-1.c | 5 + .../gcc.target/i386/avx512vl-pr88514-2.c | 5 + .../gcc.target/i386/avx512vl-pr88514-3.c | 5 + gcc/tree-vect-stmts.c | 102 +++++++++++++----- 22 files changed, 469 insertions(+), 35 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr88513-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr88513-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88464-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88464-4.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88513-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88513-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88513-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88513-4.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88514-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88514-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr88514-3.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e24bb94baa0..6f7c79d31a1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,31 @@ +2018-12-18 Jakub Jelinek + + PR target/88513 + PR target/88514 + * optabs.def (vec_pack_sbool_trunc_optab, vec_unpacks_sbool_hi_optab, + vec_unpacks_sbool_lo_optab): New optabs. + * optabs.c (expand_widen_pattern_expr): Use vec_unpacks_sbool_*_optab + and pass additional argument if both input and target have the same + scalar mode of VECTOR_BOOLEAN_TYPE_P vectors. + * expr.c (expand_expr_real_2) : Handle + VECTOR_BOOLEAN_TYPE_P pack where result has the same scalar mode + as the operands using vec_pack_sbool_trunc_optab. + * tree-vect-stmts.c (supportable_widening_operation): Use + vec_unpacks_sbool_{lo,hi}_optab for VECTOR_BOOLEAN_TYPE_P conversions + where both wider_vectype and vectype have the same scalar mode. + (supportable_narrowing_operation): Similarly use + vec_pack_sbool_trunc_optab if narrow_vectype and vectype have the same + scalar mode. + * config/i386/i386.c (ix86_get_builtin) + : Check for VECTOR_MODE_P + rather than non-VOIDmode. + * config/i386/sse.md (vec_pack_trunc_qi, vec_pack_trunc_): + Remove useless ()s around "register_operand", formatting fixes. + (vec_pack_sbool_trunc_qi, vec_unpacks_sbool_lo_qi, + vec_unpacks_sbool_hi_qi): New expanders. + * doc/md.texi (vec_pack_sbool_trunc_M, vec_unpacks_sbool_hi_M, + vec_unpacks_sbool_lo_M): Document. + 2018-12-18 Jozef Lawrynowicz * combine.c (update_rsp_from_reg_equal): Only look for the nonzero bits diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 1a4c407040a..1c36e12c79c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -37625,7 +37625,7 @@ rdseed_step: op0 = copy_to_mode_reg (GET_MODE (op0), op0); emit_insn (gen (half, op0)); op0 = half; - if (GET_MODE (op3) != VOIDmode) + if (VECTOR_MODE_P (GET_MODE (op3))) { half = gen_reg_rtx (mode0); if (!nonimmediate_operand (op3, GET_MODE (op3))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 877759c826e..3786afdf5e8 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -12435,22 +12435,59 @@ }) (define_expand "vec_pack_trunc_qi" - [(set (match_operand:HI 0 ("register_operand")) - (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand"))) + [(set (match_operand:HI 0 "register_operand") + (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand")) (const_int 8)) - (zero_extend:HI (match_operand:QI 1 ("register_operand")))))] + (zero_extend:HI (match_operand:QI 1 "register_operand"))))] "TARGET_AVX512F") (define_expand "vec_pack_trunc_" - [(set (match_operand: 0 ("register_operand")) - (ior: (ashift: (zero_extend: (match_operand:SWI24 2 ("register_operand"))) - (match_dup 3)) - (zero_extend: (match_operand:SWI24 1 ("register_operand")))))] + [(set (match_operand: 0 "register_operand") + (ior: + (ashift: + (zero_extend: + (match_operand:SWI24 2 "register_operand")) + (match_dup 3)) + (zero_extend: + (match_operand:SWI24 1 "register_operand"))))] "TARGET_AVX512BW" { operands[3] = GEN_INT (GET_MODE_BITSIZE (mode)); }) +(define_expand "vec_pack_sbool_trunc_qi" + [(match_operand:QI 0 "register_operand") + (match_operand:QI 1 "register_operand") + (match_operand:QI 2 "register_operand") + (match_operand:QI 3 "const_int_operand")] + "TARGET_AVX512F" +{ + HOST_WIDE_INT nunits = INTVAL (operands[3]); + rtx mask, tem1, tem2; + if (nunits != 8 && nunits != 4) + FAIL; + mask = gen_reg_rtx (QImode); + emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1)); + tem1 = gen_reg_rtx (QImode); + emit_insn (gen_kandqi (tem1, operands[1], mask)); + if (TARGET_AVX512DQ) + { + tem2 = gen_reg_rtx (QImode); + emit_insn (gen_kashiftqi (tem2, operands[2], + GEN_INT (nunits / 2))); + } + else + { + tem2 = gen_reg_rtx (HImode); + emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2], + QImode), + GEN_INT (nunits / 2))); + tem2 = lowpart_subreg (QImode, tem2, HImode); + } + emit_insn (gen_kiorqi (operands[0], tem1, tem2)); + DONE; +}) + (define_insn "_packsswb" [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") (vec_concat:VI1_AVX512 @@ -14603,6 +14640,18 @@ "TARGET_SSE2" "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;") +(define_expand "vec_unpacks_sbool_lo_qi" + [(match_operand:QI 0 "register_operand") + (match_operand:QI 1 "register_operand") + (match_operand:QI 2 "const_int_operand")] + "TARGET_AVX512F" +{ + if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4) + FAIL; + emit_move_insn (operands[0], operands[1]); + DONE; +}) + (define_expand "vec_unpacks_lo_hi" [(set (subreg:HI (match_operand:QI 0 "register_operand") 0) (match_operand:HI 1 "register_operand"))] @@ -14624,6 +14673,29 @@ "TARGET_SSE2" "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") +(define_expand "vec_unpacks_sbool_hi_qi" + [(match_operand:QI 0 "register_operand") + (match_operand:QI 1 "register_operand") + (match_operand:QI 2 "const_int_operand")] + "TARGET_AVX512F" +{ + HOST_WIDE_INT nunits = INTVAL (operands[2]); + if (nunits != 8 && nunits != 4) + FAIL; + if (TARGET_AVX512DQ) + emit_insn (gen_klshiftrtqi (operands[0], operands[1], + GEN_INT (nunits / 2))); + else + { + rtx tem = gen_reg_rtx (HImode); + emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1], + QImode), + GEN_INT (nunits / 2))); + emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode)); + } + DONE; +}) + (define_expand "vec_unpacks_hi_hi" [(parallel [(set (subreg:HI (match_operand:QI 0 "register_operand") 0) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index aa90576b34d..197b6ba339f 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5428,6 +5428,16 @@ are vectors of the same mode having N integral or floating point elements of size S@. Operand 0 is the resulting vector in which 2*N elements of size N/2 are concatenated after narrowing them down using truncation. +@cindex @code{vec_pack_sbool_trunc_@var{m}} instruction pattern +@item @samp{vec_pack_sbool_trunc_@var{m}} +Narrow and merge the elements of two vectors. Operands 1 and 2 are vectors +of the same type having N boolean elements. Operand 0 is the resulting +vector in which 2*N elements are concatenated. The last operand (operand 3) +is the number of elements in the output vector 2*N as a @code{CONST_INT}. +This instruction pattern is used when all the vector input and output +operands have the same scalar mode @var{m} and thus using +@code{vec_pack_trunc_@var{m}} would be ambiguous. + @cindex @code{vec_pack_ssat_@var{m}} instruction pattern @cindex @code{vec_pack_usat_@var{m}} instruction pattern @item @samp{vec_pack_ssat_@var{m}}, @samp{vec_pack_usat_@var{m}} @@ -5470,6 +5480,17 @@ integral elements. The input vector (operand 1) has N elements of size S. Widen (promote) the high/low elements of the vector using zero extension and place the resulting N/2 values of size 2*S in the output vector (operand 0). +@cindex @code{vec_unpacks_sbool_hi_@var{m}} instruction pattern +@cindex @code{vec_unpacks_sbool_lo_@var{m}} instruction pattern +@item @samp{vec_unpacks_sbool_hi_@var{m}}, @samp{vec_unpacks_sbool_lo_@var{m}} +Extract the high/low part of a vector of boolean elements that have scalar +mode @var{m}. The input vector (operand 1) has N elements, the output +vector (operand 0) has N/2 elements. The last operand (operand 2) is the +number of elements of the input vector N as a @code{CONST_INT}. These +patterns are used if both the input and output vectors have the same scalar +mode @var{m} and thus using @code{vec_unpacks_hi_@var{m}} or +@code{vec_unpacks_lo_@var{m}} would be ambiguous. + @cindex @code{vec_unpacks_float_hi_@var{m}} instruction pattern @cindex @code{vec_unpacks_float_lo_@var{m}} instruction pattern @cindex @code{vec_unpacku_float_hi_@var{m}} instruction pattern diff --git a/gcc/expr.c b/gcc/expr.c index b4a2133ebce..fe3647f0ac7 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -9493,12 +9493,34 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, gcc_assert (target); return target; - case VEC_PACK_TRUNC_EXPR: case VEC_PACK_SAT_EXPR: case VEC_PACK_FIX_TRUNC_EXPR: mode = TYPE_MODE (TREE_TYPE (treeop0)); goto binop; + case VEC_PACK_TRUNC_EXPR: + if (VECTOR_BOOLEAN_TYPE_P (type) + && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (treeop0)) + && mode == TYPE_MODE (TREE_TYPE (treeop0)) + && SCALAR_INT_MODE_P (mode)) + { + struct expand_operand eops[4]; + machine_mode imode = TYPE_MODE (TREE_TYPE (treeop0)); + expand_operands (treeop0, treeop1, + subtarget, &op0, &op1, EXPAND_NORMAL); + this_optab = vec_pack_sbool_trunc_optab; + enum insn_code icode = optab_handler (this_optab, imode); + create_output_operand (&eops[0], target, mode); + create_convert_operand_from (&eops[1], op0, imode, false); + create_convert_operand_from (&eops[2], op1, imode, false); + temp = GEN_INT (TYPE_VECTOR_SUBPARTS (type).to_constant ()); + create_input_operand (&eops[3], temp, imode); + expand_insn (icode, 4, eops); + return eops[0].value; + } + mode = TYPE_MODE (TREE_TYPE (treeop0)); + goto binop; + case VEC_PACK_FLOAT_EXPR: mode = TYPE_MODE (TREE_TYPE (treeop0)); expand_operands (treeop0, treeop1, diff --git a/gcc/optabs.c b/gcc/optabs.c index 1f87e428816..68270bdea98 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -256,6 +256,7 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, enum insn_code icode; int nops = TREE_CODE_LENGTH (ops->code); int op; + bool sbool = false; oprnd0 = ops->op0; tmode0 = TYPE_MODE (TREE_TYPE (oprnd0)); @@ -265,6 +266,22 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, for these ops. */ widen_pattern_optab = optab_for_tree_code (ops->code, ops->type, optab_default); + else if ((ops->code == VEC_UNPACK_HI_EXPR + || ops->code == VEC_UNPACK_LO_EXPR) + && VECTOR_BOOLEAN_TYPE_P (ops->type) + && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (oprnd0)) + && TYPE_MODE (ops->type) == TYPE_MODE (TREE_TYPE (oprnd0)) + && SCALAR_INT_MODE_P (TYPE_MODE (ops->type))) + { + /* For VEC_UNPACK_{LO,HI}_EXPR if the mode of op0 and result is + the same scalar mode for VECTOR_BOOLEAN_TYPE_P vectors, use + vec_unpacks_sbool_{lo,hi}_optab, so that we can pass in + the pattern number of elements in the wider vector. */ + widen_pattern_optab + = (ops->code == VEC_UNPACK_HI_EXPR + ? vec_unpacks_sbool_hi_optab : vec_unpacks_sbool_lo_optab); + sbool = true; + } else widen_pattern_optab = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default); @@ -282,6 +299,12 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, oprnd1 = ops->op1; tmode1 = TYPE_MODE (TREE_TYPE (oprnd1)); } + else if (sbool) + { + nops = 2; + op1 = GEN_INT (TYPE_VECTOR_SUBPARTS (TREE_TYPE (oprnd0)).to_constant ()); + tmode1 = tmode0; + } /* The last operand is of a wider mode than the rest of the operands. */ if (nops == 2) diff --git a/gcc/optabs.def b/gcc/optabs.def index 007212f6367..3ede65bdaf4 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -338,6 +338,7 @@ OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a") OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a") OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a") OPTAB_D (vec_pack_ufix_trunc_optab, "vec_pack_ufix_trunc_$a") +OPTAB_D (vec_pack_sbool_trunc_optab, "vec_pack_sbool_trunc_$a") OPTAB_D (vec_pack_usat_optab, "vec_pack_usat_$a") OPTAB_D (vec_packs_float_optab, "vec_packs_float_$a") OPTAB_D (vec_packu_float_optab, "vec_packu_float_$a") @@ -353,6 +354,8 @@ OPTAB_D (vec_unpacks_float_hi_optab, "vec_unpacks_float_hi_$a") OPTAB_D (vec_unpacks_float_lo_optab, "vec_unpacks_float_lo_$a") OPTAB_D (vec_unpacks_hi_optab, "vec_unpacks_hi_$a") OPTAB_D (vec_unpacks_lo_optab, "vec_unpacks_lo_$a") +OPTAB_D (vec_unpacks_sbool_hi_optab, "vec_unpacks_sbool_hi_$a") +OPTAB_D (vec_unpacks_sbool_lo_optab, "vec_unpacks_sbool_lo_$a") OPTAB_D (vec_unpacku_float_hi_optab, "vec_unpacku_float_hi_$a") OPTAB_D (vec_unpacku_float_lo_optab, "vec_unpacku_float_lo_$a") OPTAB_D (vec_unpacku_hi_optab, "vec_unpacku_hi_$a") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4f6358b9384..1ed54f4566d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,21 @@ +2018-12-18 Jakub Jelinek + + PR target/88513 + PR target/88514 + * gcc.target/i386/avx512f-pr88513-1.c: New test. + * gcc.target/i386/avx512f-pr88513-2.c: New test. + * gcc.target/i386/avx512vl-pr88464-1.c: New test. + * gcc.target/i386/avx512vl-pr88464-2.c: New test. + * gcc.target/i386/avx512vl-pr88464-3.c: New test. + * gcc.target/i386/avx512vl-pr88464-4.c: New test. + * gcc.target/i386/avx512vl-pr88513-1.c: New test. + * gcc.target/i386/avx512vl-pr88513-2.c: New test. + * gcc.target/i386/avx512vl-pr88513-3.c: New test. + * gcc.target/i386/avx512vl-pr88513-4.c: New test. + * gcc.target/i386/avx512vl-pr88514-1.c: New test. + * gcc.target/i386/avx512vl-pr88514-2.c: New test. + * gcc.target/i386/avx512vl-pr88514-3.c: New test. + 2018-12-18 Wei Xiao * g++.target/i386/mv16.C: Handle new march. diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88513-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88513-1.c new file mode 100644 index 00000000000..12bf70985dd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88513-1.c @@ -0,0 +1,16 @@ +/* PR target/88513 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mtune=intel -mprefer-vector-width=512 -fno-vect-cost-model" } */ +/* { dg-require-effective-target avx512f } */ + +#define CHECK_H "avx512f-check.h" + +#include "../../gcc.dg/vect/pr59591-1.c" + +#include CHECK_H + +static void +test_512 (void) +{ + bar (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88513-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88513-2.c new file mode 100644 index 00000000000..9f4c2792a42 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88513-2.c @@ -0,0 +1,16 @@ +/* PR target/88513 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512f -mtune=intel -mprefer-vector-width=512 -fno-vect-cost-model" } */ +/* { dg-require-effective-target avx512f } */ + +#define CHECK_H "avx512f-check.h" + +#include "../../gcc.dg/vect/pr59591-2.c" + +#include CHECK_H + +static void +test_512 (void) +{ + bar (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c new file mode 100644 index 00000000000..55a28dddbf8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c @@ -0,0 +1,7 @@ +/* PR tree-optimization/88464 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +#include "avx512f-pr88464-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-2.c new file mode 100644 index 00000000000..b5c8205e524 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-2.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/88464 */ +/* { dg-do run { target { avx512vl } } } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=skylake-avx512" } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 + +#include "avx512f-pr88464-2.c" + +static void +test_256 (void) +{ + avx512f_test (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c new file mode 100644 index 00000000000..6b0c8a85957 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c @@ -0,0 +1,7 @@ +/* PR tree-optimization/88464 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ + +#include "avx512f-pr88464-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-4.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-4.c new file mode 100644 index 00000000000..7df6ce3fadc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-4.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/88464 */ +/* { dg-do run { target { avx512vl } } } */ +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=skylake-avx512" } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 + +#include "avx512f-pr88464-2.c" + +static void +test_256 (void) +{ + avx512f_test (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-1.c new file mode 100644 index 00000000000..a5d144ff838 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-1.c @@ -0,0 +1,24 @@ +/* PR target/88513 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512vl -mtune=intel -mprefer-vector-width=128 -fno-vect-cost-model" } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 +#define CHECK_H "avx512f-check.h" + +#include "../../gcc.dg/vect/pr59591-1.c" + +#include CHECK_H + +static void +test_256 (void) +{ + bar (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-2.c new file mode 100644 index 00000000000..6eef7dedfa2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-2.c @@ -0,0 +1,24 @@ +/* PR target/88513 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512vl -mtune=intel -mprefer-vector-width=128 -fno-vect-cost-model" } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 +#define CHECK_H "avx512f-check.h" + +#include "../../gcc.dg/vect/pr59591-2.c" + +#include CHECK_H + +static void +test_256 (void) +{ + bar (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-3.c new file mode 100644 index 00000000000..884682011db --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-3.c @@ -0,0 +1,24 @@ +/* PR target/88513 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512vl -mtune=intel -mprefer-vector-width=256 -fno-vect-cost-model" } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 +#define CHECK_H "avx512f-check.h" + +#include "../../gcc.dg/vect/pr59591-1.c" + +#include CHECK_H + +static void +test_256 (void) +{ + bar (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-4.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-4.c new file mode 100644 index 00000000000..1f0ae18cbad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88513-4.c @@ -0,0 +1,24 @@ +/* PR target/88513 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp-simd -mavx512vl -mtune=intel -mprefer-vector-width=256 -fno-vect-cost-model" } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512VL +#define AVX512F_LEN 512 +#define AVX512F_LEN_HALF 256 +#define CHECK_H "avx512f-check.h" + +#include "../../gcc.dg/vect/pr59591-2.c" + +#include CHECK_H + +static void +test_256 (void) +{ + bar (); +} + +static void +test_128 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-1.c new file mode 100644 index 00000000000..ba5b5dadf1f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-1.c @@ -0,0 +1,5 @@ +/* PR target/88514 */ +/* { dg-do assemble { target avx512vl } } */ +/* { dg-options "-Ofast -mavx512vl -mtune=intel -mprefer-vector-width=128" } */ + +#include "avx512vl-pr79299-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-2.c new file mode 100644 index 00000000000..6128390d43f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-2.c @@ -0,0 +1,5 @@ +/* PR target/88514 */ +/* { dg-do assemble { target avx512vl } } */ +/* { dg-options "-Ofast -mavx512vl -mtune=intel -mprefer-vector-width=256" } */ + +#include "avx512vl-pr79299-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-3.c new file mode 100644 index 00000000000..6614741b1b9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88514-3.c @@ -0,0 +1,5 @@ +/* PR target/88514 */ +/* { dg-do assemble { target avx512vl } } */ +/* { dg-options "-Ofast -mavx512vl -mtune=intel -mprefer-vector-width=512" } */ + +#include "avx512vl-pr79299-1.c" diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 589e018d1e2..7aa774a0f29 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -10313,6 +10313,17 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info, optab1 = optab_for_tree_code (c1, vectype_out, optab_default); optab2 = optab_for_tree_code (c2, vectype_out, optab_default); } + else if (CONVERT_EXPR_CODE_P (code) + && VECTOR_BOOLEAN_TYPE_P (wide_vectype) + && VECTOR_BOOLEAN_TYPE_P (vectype) + && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype) + && SCALAR_INT_MODE_P (TYPE_MODE (vectype))) + { + /* If the input and result modes are the same, a different optab + is needed where we pass in the number of units in vectype. */ + optab1 = vec_unpacks_sbool_lo_optab; + optab2 = vec_unpacks_sbool_hi_optab; + } else { optab1 = optab_for_tree_code (c1, vectype, optab_default); @@ -10332,12 +10343,16 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info, if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) + { + if (!VECTOR_BOOLEAN_TYPE_P (vectype)) + return true; /* For scalar masks we may have different boolean vector types having the same QImode. Thus we add additional check for elements number. */ - return (!VECTOR_BOOLEAN_TYPE_P (vectype) - || known_eq (TYPE_VECTOR_SUBPARTS (vectype), - TYPE_VECTOR_SUBPARTS (wide_vectype) * 2)); + if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), + TYPE_VECTOR_SUBPARTS (wide_vectype) * 2)) + return true; + } /* Check if it's a multi-step conversion that can be done using intermediate types. */ @@ -10367,8 +10382,21 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info, = lang_hooks.types.type_for_mode (intermediate_mode, TYPE_UNSIGNED (prev_type)); - optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); - optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); + if (VECTOR_BOOLEAN_TYPE_P (intermediate_type) + && VECTOR_BOOLEAN_TYPE_P (prev_type) + && intermediate_mode == prev_mode + && SCALAR_INT_MODE_P (prev_mode)) + { + /* If the input and result modes are the same, a different optab + is needed where we pass in the number of units in vectype. */ + optab3 = vec_unpacks_sbool_lo_optab; + optab4 = vec_unpacks_sbool_hi_optab; + } + else + { + optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); + optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); + } if (!optab3 || !optab4 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing @@ -10386,9 +10414,13 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info, if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) - return (!VECTOR_BOOLEAN_TYPE_P (vectype) - || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type), - TYPE_VECTOR_SUBPARTS (wide_vectype) * 2)); + { + if (!VECTOR_BOOLEAN_TYPE_P (vectype)) + return true; + if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type), + TYPE_VECTOR_SUBPARTS (wide_vectype) * 2)) + return true; + } prev_type = intermediate_type; prev_mode = intermediate_mode; @@ -10441,26 +10473,30 @@ supportable_narrowing_operation (enum tree_code code, { CASE_CONVERT: c1 = VEC_PACK_TRUNC_EXPR; + if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype) + && VECTOR_BOOLEAN_TYPE_P (vectype) + && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype) + && SCALAR_INT_MODE_P (TYPE_MODE (vectype))) + optab1 = vec_pack_sbool_trunc_optab; + else + optab1 = optab_for_tree_code (c1, vectype, optab_default); break; case FIX_TRUNC_EXPR: c1 = VEC_PACK_FIX_TRUNC_EXPR; + /* The signedness is determined from output operand. */ + optab1 = optab_for_tree_code (c1, vectype_out, optab_default); break; case FLOAT_EXPR: c1 = VEC_PACK_FLOAT_EXPR; + optab1 = optab_for_tree_code (c1, vectype, optab_default); break; default: gcc_unreachable (); } - if (code == FIX_TRUNC_EXPR) - /* The signedness is determined from output operand. */ - optab1 = optab_for_tree_code (c1, vectype_out, optab_default); - else - optab1 = optab_for_tree_code (c1, vectype, optab_default); - if (!optab1) return false; @@ -10471,12 +10507,16 @@ supportable_narrowing_operation (enum tree_code code, *code1 = c1; if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) - /* For scalar masks we may have different boolean - vector types having the same QImode. Thus we - add additional check for elements number. */ - return (!VECTOR_BOOLEAN_TYPE_P (vectype) - || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2, - TYPE_VECTOR_SUBPARTS (narrow_vectype))); + { + if (!VECTOR_BOOLEAN_TYPE_P (vectype)) + return true; + /* For scalar masks we may have different boolean + vector types having the same QImode. Thus we + add additional check for elements number. */ + if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2, + TYPE_VECTOR_SUBPARTS (narrow_vectype))) + return true; + } if (code == FLOAT_EXPR) return false; @@ -10528,9 +10568,15 @@ supportable_narrowing_operation (enum tree_code code, else intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode, uns); - interm_optab - = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type, - optab_default); + if (VECTOR_BOOLEAN_TYPE_P (intermediate_type) + && VECTOR_BOOLEAN_TYPE_P (prev_type) + && intermediate_mode == prev_mode + && SCALAR_INT_MODE_P (prev_mode)) + interm_optab = vec_pack_sbool_trunc_optab; + else + interm_optab + = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type, + optab_default); if (!interm_optab || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing) || insn_data[icode1].operand[0].mode != intermediate_mode @@ -10542,9 +10588,13 @@ supportable_narrowing_operation (enum tree_code code, (*multi_step_cvt)++; if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) - return (!VECTOR_BOOLEAN_TYPE_P (vectype) - || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2, - TYPE_VECTOR_SUBPARTS (narrow_vectype))); + { + if (!VECTOR_BOOLEAN_TYPE_P (vectype)) + return true; + if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2, + TYPE_VECTOR_SUBPARTS (narrow_vectype))) + return true; + } prev_mode = intermediate_mode; prev_type = intermediate_type; -- 2.30.2