From ee78c20e74d30284fee36e22a64e86e45e676029 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Fri, 18 Dec 2020 15:56:06 +0800 Subject: [PATCH] Lower AVX512 vector comparison to AVX version when dest is vector. gcc/ChangeLog: PR target/96891 PR target/98348 * config/i386/sse.md (VI_128_256): New mode iterator. (*avx_cmp3_1, *avx_cmp3_2, *avx_cmp3_3, *avx_cmp3_4, *avx2_eq3, *avx2_pcmp3_1, *avx2_pcmp3_2, *avx2_gt3): New define_insn_and_split to lower avx512 vector comparison to avx version when dest is vector. (*_cmp3,*_cmp3,*_ucmp3): define_insn_and_split for negating the comparison result. * config/i386/predicates.md (float_vector_all_ones_operand): New predicate. * config/i386/i386-expand.c (ix86_expand_sse_movcc): Use general NOT operator without UNSPEC_MASKOP. gcc/testsuite/ChangeLog: PR target/96891 PR target/98348 * gcc.target/i386/avx512bw-pr96891-1.c: New test. * gcc.target/i386/avx512f-pr96891-1.c: New test. * gcc.target/i386/avx512f-pr96891-2.c: New test. * gcc.target/i386/avx512f-pr96891-3.c: New test. * g++.target/i386/avx512f-pr96891-1.C: New test. * gcc.target/i386/bitwise_mask_op-3.c: Adjust testcase. --- gcc/config/i386/i386-expand.c | 14 +- gcc/config/i386/predicates.md | 47 ++++ gcc/config/i386/sse.md | 261 +++++++++++++++++- .../g++.target/i386/avx512f-pr96891-1.C | 37 +++ .../gcc.target/i386/avx512bw-pr96891-1.c | 75 +++++ .../gcc.target/i386/avx512f-pr96891-1.c | 40 +++ .../gcc.target/i386/avx512f-pr96891-2.c | 30 ++ .../gcc.target/i386/avx512f-pr96891-3.c | 39 +++ .../gcc.target/i386/bitwise_mask_op-3.c | 1 - 9 files changed, 531 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/avx512f-pr96891-1.C create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr96891-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr96891-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr96891-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr96891-3.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 280645f60d5..d64b4acc7dc 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -3568,17 +3568,11 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) ? force_reg (mode, op_false) : op_false); if (op_true == CONST0_RTX (mode)) { - rtx (*gen_not) (rtx, rtx); - switch (cmpmode) - { - case E_QImode: gen_not = gen_knotqi; break; - case E_HImode: gen_not = gen_knothi; break; - case E_SImode: gen_not = gen_knotsi; break; - case E_DImode: gen_not = gen_knotdi; break; - default: gcc_unreachable (); - } rtx n = gen_reg_rtx (cmpmode); - emit_insn (gen_not (n, cmp)); + if (cmpmode == E_DImode && !TARGET_64BIT) + emit_insn (gen_knotdi (n, cmp)); + else + emit_insn (gen_rtx_SET (n, gen_rtx_fmt_e (NOT, cmpmode, cmp))); cmp = n; /* Reverse op_true op_false. */ std::swap (op_true, op_false); diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 0a3ab4dce68..ee42ba2922b 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1069,6 +1069,53 @@ return true; }) +/* Return true if operand is a float vector constant that is all ones. */ +(define_predicate "float_vector_all_ones_operand" + (match_code "const_vector,mem") +{ + mode = GET_MODE (op); + if (!FLOAT_MODE_P (mode) + || (MEM_P (op) + && (!SYMBOL_REF_P (XEXP (op, 0)) + || !CONSTANT_POOL_ADDRESS_P (XEXP (op, 0))))) + return false; + + if (MEM_P (op)) + { + op = get_pool_constant (XEXP (op, 0)); + if (GET_CODE (op) != CONST_VECTOR) + return false; + + if (GET_MODE (op) != mode + && INTEGRAL_MODE_P (GET_MODE (op)) + && op == CONSTM1_RTX (GET_MODE (op))) + return true; + } + + rtx first = XVECEXP (op, 0, 0); + for (int i = 1; i != GET_MODE_NUNITS (GET_MODE (op)); i++) + { + rtx tmp = XVECEXP (op, 0, i); + if (!rtx_equal_p (tmp, first)) + return false; + } + if (GET_MODE (first) == E_SFmode) + { + long l; + REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (first), l); + return (l & 0xffffffff) == 0xffffffff; + } + else if (GET_MODE (first) == E_DFmode) + { + long l[2]; + REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (first), l); + return ((l[0] & 0xffffffff) == 0xffffffff + && (l[1] & 0xffffffff) == 0xffffffff); + } + else + return false; +}) + /* Return true if operand is a vector constant that is all ones. */ (define_predicate "vector_all_ones_operand" (and (match_code "const_vector") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 42d4c447182..9683a991fb2 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -648,6 +648,9 @@ ;; All 256bit vector integer modes (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI]) +;; All 128 and 256bit vector integer modes +(define_mode_iterator VI_128_256 [V16QI V8HI V4SI V2DI V32QI V16HI V8SI V4DI]) + ;; Various 128bit vector integer mode combinations (define_mode_iterator VI12_128 [V16QI V8HI]) (define_mode_iterator VI14_128 [V16QI V4SI]) @@ -2965,6 +2968,102 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) +(define_insn_and_split "*avx_cmp3_1" + [(set (match_operand: 0 "register_operand") + (vec_merge: + (match_operand: 1 "vector_all_ones_operand") + (match_operand: 2 "const0_operand") + (unspec: + [(match_operand:VF_128_256 3 "register_operand") + (match_operand:VF_128_256 4 "nonimmediate_operand") + (match_operand:SI 5 "const_0_to_31_operand")] + UNSPEC_PCMP)))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 6) + (unspec:VF_128_256 + [(match_dup 3) + (match_dup 4) + (match_dup 5)] + UNSPEC_PCMP)) + (set (match_dup 0) (match_dup 7))] +{ + operands[6] = gen_reg_rtx (mode); + operands[7] + = lowpart_subreg (GET_MODE (operands[0]), operands[6], mode); +}) + +(define_insn_and_split "*avx_cmp3_2" + [(set (match_operand: 0 "register_operand") + (vec_merge: + (match_operand: 1 "vector_all_ones_operand") + (match_operand: 2 "const0_operand") + (not: + (unspec: + [(match_operand:VF_128_256 3 "register_operand") + (match_operand:VF_128_256 4 "nonimmediate_operand") + (match_operand:SI 5 "const_0_to_31_operand")] + UNSPEC_PCMP))))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 6) + (unspec:VF_128_256 + [(match_dup 3) + (match_dup 4) + (match_dup 5)] + UNSPEC_PCMP)) + (set (match_dup 0) (match_dup 7))] +{ + operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4); + operands[6] = gen_reg_rtx (mode); + operands[7] + = lowpart_subreg (GET_MODE (operands[0]), operands[6], mode); +}) + +(define_insn_and_split "*avx_cmp3_3" + [(set (match_operand:VF_128_256 0 "register_operand") + (vec_merge:VF_128_256 + (match_operand:VF_128_256 1 "float_vector_all_ones_operand") + (match_operand:VF_128_256 2 "const0_operand") + (unspec: + [(match_operand:VF_128_256 3 "register_operand") + (match_operand:VF_128_256 4 "nonimmediate_operand") + (match_operand:SI 5 "const_0_to_31_operand")] + UNSPEC_PCMP)))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:VF_128_256 + [(match_dup 3) + (match_dup 4) + (match_dup 5)] + UNSPEC_PCMP))]) + +(define_insn_and_split "*avx_cmp3_4" + [(set (match_operand:VF_128_256 0 "register_operand") + (vec_merge:VF_128_256 + (match_operand:VF_128_256 1 "float_vector_all_ones_operand") + (match_operand:VF_128_256 2 "const0_operand") + (not: + (unspec: + [(match_operand:VF_128_256 3 "register_operand") + (match_operand:VF_128_256 4 "nonimmediate_operand") + (match_operand:SI 5 "const_0_to_31_operand")] + UNSPEC_PCMP))))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:VF_128_256 + [(match_dup 3) + (match_dup 4) + (match_dup 5)] + UNSPEC_PCMP))] + "operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);") + (define_insn "avx_vmcmp3" [(set (match_operand:VF_128 0 "register_operand" "=x") (vec_merge:VF_128 @@ -3056,6 +3155,25 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_insn_and_split "*_cmp3" + [(set (match_operand: 0 "register_operand") + (not: + (unspec: + [(match_operand:V48_AVX512VL 1 "register_operand") + (match_operand:V48_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "" "n")] + UNSPEC_PCMP)))] + "TARGET_AVX512F && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec: + [(match_dup 1) + (match_dup 2) + (match_dup 4)] + UNSPEC_PCMP))] + "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") + (define_insn "_cmp3" [(set (match_operand: 0 "register_operand" "=k") (unspec: @@ -3070,6 +3188,28 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_int_iterator UNSPEC_PCMP_ITER + [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP]) + +(define_insn_and_split "*_cmp3" + [(set (match_operand: 0 "register_operand") + (not: + (unspec: + [(match_operand:VI12_AVX512VL 1 "register_operand") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "")] + UNSPEC_PCMP_ITER)))] + "TARGET_AVX512BW && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec: + [(match_dup 1) + (match_dup 2) + (match_dup 4)] + UNSPEC_PCMP_ITER))] + "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") + (define_insn "_ucmp3" [(set (match_operand: 0 "register_operand" "=k") (unspec: @@ -3098,8 +3238,24 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_int_iterator UNSPEC_PCMP_ITER - [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP]) +(define_insn_and_split "*_ucmp3" + [(set (match_operand: 0 "register_operand") + (not: + (unspec: + [(match_operand:VI48_AVX512VL 1 "register_operand") + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_7_operand")] + UNSPEC_UNSIGNED_PCMP)))] + "TARGET_AVX512F && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec: + [(match_dup 1) + (match_dup 2) + (match_dup 4)] + UNSPEC_UNSIGNED_PCMP))] + "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") (define_int_attr pcmp_signed_mask [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")]) @@ -12733,6 +12889,89 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) +(define_insn_and_split "*avx2_eq3" + [(set (match_operand:VI_128_256 0 "register_operand") + (vec_merge:VI_128_256 + (match_operand:VI_128_256 1 "vector_all_ones_operand") + (match_operand:VI_128_256 2 "const0_operand") + (unspec: + [(match_operand:VI_128_256 3 "nonimmediate_operand") + (match_operand:VI_128_256 4 "nonimmediate_operand")] + UNSPEC_MASKED_EQ)))] + "TARGET_AVX512VL && ix86_pre_reload_split () + && !(MEM_P (operands[3]) && MEM_P (operands[4]))" + "#" + "&& 1" + [(set (match_dup 0) + (eq:VI_128_256 + (match_dup 3) + (match_dup 4)))]) + +(define_insn_and_split "*avx2_pcmp3_1" + [(set (match_operand:VI_128_256 0 "register_operand") + (vec_merge:VI_128_256 + (match_operand:VI_128_256 1 "vector_all_ones_operand") + (match_operand:VI_128_256 2 "const0_operand") + (unspec: + [(match_operand:VI_128_256 3 "nonimmediate_operand") + (match_operand:VI_128_256 4 "nonimmediate_operand") + (match_operand:SI 5 "const_0_to_7_operand")] + UNSPEC_PCMP)))] + "TARGET_AVX512VL && ix86_pre_reload_split () + /* EQ is commutative. */ + && ((INTVAL (operands[5]) == 0 + && !(MEM_P (operands[3]) && MEM_P (operands[4]))) + /* NLE aka GT, 3 must be register. */ + || (INTVAL (operands[5]) == 6 + && !MEM_P (operands[3])) + /* LT, 4 must be register and we swap operands. */ + || (INTVAL (operands[5]) == 1 + && !MEM_P (operands[4])))" + "#" + "&& 1" + [(const_int 0)] +{ + if (INTVAL (operands[5]) == 1) + std::swap (operands[3], operands[4]); + enum rtx_code code = INTVAL (operands[5]) ? GT : EQ; + emit_move_insn (operands[0], gen_rtx_fmt_ee (code, mode, + operands[3], operands[4])); + DONE; +}) + +(define_insn_and_split "*avx2_pcmp3_2" + [(set (match_operand:VI_128_256 0 "register_operand") + (vec_merge:VI_128_256 + (match_operand:VI_128_256 1 "vector_all_ones_operand") + (match_operand:VI_128_256 2 "const0_operand") + (not: + (unspec: + [(match_operand:VI_128_256 3 "nonimmediate_operand") + (match_operand:VI_128_256 4 "nonimmediate_operand") + (match_operand:SI 5 "const_0_to_7_operand")] + UNSPEC_PCMP))))] + "TARGET_AVX512VL && ix86_pre_reload_split () + /* NE is commutative. */ + && ((INTVAL (operands[5]) == 4 + && !(MEM_P (operands[3]) && MEM_P (operands[4]))) + /* LE, 3 must be register. */ + || (INTVAL (operands[5]) == 2 + && !MEM_P (operands[3])) + /* NLT aka GE, 4 must be register and we swap operands. */ + || (INTVAL (operands[5]) == 5 + && !MEM_P (operands[4])))" + "#" + "&& 1" + [(const_int 0)] +{ + if (INTVAL (operands[5]) == 5) + std::swap (operands[3], operands[4]); + enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ; + emit_move_insn (operands[0], gen_rtx_fmt_ee (code, mode, + operands[3], operands[4])); + DONE; +}) + (define_expand "_eq3" [(set (match_operand: 0 "register_operand") (unspec: @@ -12857,6 +13096,24 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) +(define_insn_and_split "*avx2_gt3" + [(set (match_operand:VI_128_256 0 "register_operand") + (vec_merge:VI_128_256 + (match_operand:VI_128_256 1 "vector_all_ones_operand") + (match_operand:VI_128_256 2 "const0_operand") + (unspec: + [(match_operand:VI_128_256 3 "register_operand") + (match_operand:VI_128_256 4 "nonimmediate_operand")] + UNSPEC_MASKED_GT)))] + "TARGET_AVX512VL + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (gt:VI_128_256 + (match_dup 3) + (match_dup 4)))]) + (define_insn "_gt3" [(set (match_operand: 0 "register_operand" "=k") (unspec: diff --git a/gcc/testsuite/g++.target/i386/avx512f-pr96891-1.C b/gcc/testsuite/g++.target/i386/avx512f-pr96891-1.C new file mode 100644 index 00000000000..969a085b900 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx512f-pr96891-1.C @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -O2" } */ +/* { dg-final { scan-assembler-not "%k\[0-7\]" } } */ + +typedef float v4sf __attribute__ ((vector_size (16))); +typedef float v8sf __attribute__ ((vector_size (32))); +typedef double v2df __attribute__ ((vector_size (16))); +typedef double v4df __attribute__ ((vector_size (32))); + + +v4sf +foo_v4sf (v4sf x) +{ + const union U { unsigned u; float f; } u = { -1U }; + return x > 0.0f ? u.f : 0.0f; +} + +v8sf +foo_v8sf (v8sf x) +{ + const union U { unsigned u; float f; } u = { -1U }; + return x > 0.0f ? u.f : 0.0f; +} + +v2df +foo_v2df (v2df x) +{ + const union U { unsigned long long u; double df; } u = { -1ULL }; + return x > 0.0 ? u.df : 0.0; +} + +v4df +foo_v4df (v4df x) +{ + const union U { unsigned long long u; double df; } u = { -1ULL }; + return x > 0.0 ? u.df : 0.0; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr96891-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr96891-1.c new file mode 100644 index 00000000000..d899cebd0d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr96891-1.c @@ -0,0 +1,75 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-not "%k\[0-7\]" } } */ + +typedef char v16qi __attribute__ ((vector_size (16))); +typedef char v32qi __attribute__ ((vector_size (32))); +typedef short v8hi __attribute__ ((vector_size (16))); +typedef short v16hi __attribute__ ((vector_size (32))); +typedef int v4si __attribute__ ((vector_size (16))); +typedef int v8si __attribute__ ((vector_size (32))); +typedef long long v2di __attribute__ ((vector_size (16))); +typedef long long v4di __attribute__ ((vector_size (32))); + +#define FOO(VTYPE, OPNAME, OP) \ + VTYPE \ + foo_##VTYPE##_##OPNAME (VTYPE a, VTYPE b) \ + { \ + return a OP b; \ + } \ + +#define FOO1(VTYPE, OPNAME, OP) \ + VTYPE \ + foo1_##VTYPE##_##OPNAME (VTYPE a, VTYPE b) \ + { \ + return ~(a OP b); \ + } \ + +FOO (v16qi, eq, ==) +FOO1 (v16qi, neq, !=) +FOO (v16qi, gt, >) +FOO (v16qi, lt, <) +FOO1 (v16qi, le, <=) +FOO1 (v16qi, ge, >=) +FOO (v32qi, eq, ==) +FOO1 (v32qi, neq, !=) +FOO (v32qi, gt, >) +FOO (v32qi, lt, <) +FOO1 (v32qi, le, <=) +FOO1 (v32qi, ge, >=) +FOO (v8hi, eq, ==) +FOO1 (v8hi, neq, !=) +FOO (v8hi, gt, >) +FOO (v8hi, lt, <) +FOO1 (v8hi, le, <=) +FOO1 (v8hi, ge, >=) +FOO (v16hi, eq, ==) +FOO1 (v16hi, neq, !=) +FOO (v16hi, gt, >) +FOO (v16hi, lt, <) +FOO1 (v16hi, le, <=) +FOO1 (v16hi, ge, >=) +FOO (v4si, eq, ==) +FOO1 (v4si, neq, !=) +FOO (v4si, gt, >) +FOO (v4si, lt, <) +FOO1 (v4si, le, <=) +FOO1 (v4si, ge, >=) +FOO (v8si, eq, ==) +FOO1 (v8si, neq, !=) +FOO (v8si, gt, >) +FOO (v8si, lt, <) +FOO1 (v8si, le, <=) +FOO1 (v8si, ge, >=) +FOO (v2di, eq, ==) +FOO1 (v2di, neq, !=) +FOO (v2di, gt, >) +FOO (v2di, lt, <) +FOO1 (v2di, le, <=) +FOO1 (v2di, ge, >=) +FOO (v4di, eq, ==) +FOO1 (v4di, neq, !=) +FOO (v4di, gt, >) +FOO (v4di, lt, >) +FOO1 (v4di, le, <=) +FOO1 (v4di, ge, >=) diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr96891-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr96891-1.c new file mode 100644 index 00000000000..48ba943e151 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr96891-1.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -O2" } */ +/* { dg-final { scan-assembler-not "%k\[0-7\]" } } */ + +typedef float v4sf __attribute__ ((vector_size (16))); +typedef float v8sf __attribute__ ((vector_size (32))); +typedef double v2df __attribute__ ((vector_size (16))); +typedef double v4df __attribute__ ((vector_size (32))); + +#define FOO(VTYPE, OPNAME, OP) \ + VTYPE \ + foo_##VTYPE##_##OPNAME (VTYPE a, VTYPE b) \ + { \ + return a OP b; \ + } \ + +FOO (v4sf, eq, ==) +FOO (v4sf, neq, !=) +FOO (v4sf, gt, >) +FOO (v4sf, ge, >=) +FOO (v4sf, lt, <) +FOO (v4sf, le, <=) +FOO (v8sf, eq, ==) +FOO (v8sf, neq, !=) +FOO (v8sf, gt, >) +FOO (v8sf, ge, >=) +FOO (v8sf, lt, <) +FOO (v8sf, le, <=) +FOO (v2df, eq, ==) +FOO (v2df, neq, !=) +FOO (v2df, gt, >) +FOO (v2df, ge, >=) +FOO (v2df, lt, <) +FOO (v2df, le, <=) +FOO (v4df, eq, ==) +FOO (v4df, neq, !=) +FOO (v4df, gt, >) +FOO (v4df, ge, >=) +FOO (v4df, lt, <) +FOO (v4df, le, <=) diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr96891-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr96891-2.c new file mode 100644 index 00000000000..5192a00e0f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr96891-2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -mavx512bw -mavx512dq -O2" } */ +/* { dg-final { scan-assembler-not "%k\[0-7\]" } } */ + +#include + +#define FOO(VTYPE,PREFIX,SUFFIX,OPNAME,MASK,LEN) \ + VTYPE \ + foo_##LEN##_##SUFFIX##_##OPNAME (VTYPE a, VTYPE b) \ + { \ + MASK m = _mm##PREFIX##_cmp##OPNAME##_##SUFFIX##_mask (a, b); \ + return _mm##PREFIX##_movm_##SUFFIX (m); \ + } \ + +FOO (__m128i,, epi8, eq, __mmask16, 128); +FOO (__m128i,, epi16, eq, __mmask8, 128); +FOO (__m128i,, epi32, eq, __mmask8, 128); +FOO (__m128i,, epi64, eq, __mmask8, 128); +FOO (__m128i,, epi8, gt, __mmask16, 128); +FOO (__m128i,, epi16, gt, __mmask8, 128); +FOO (__m128i,, epi32, gt, __mmask8, 128); +FOO (__m128i,, epi64, gt, __mmask8, 128); +FOO (__m256i, 256, epi8, eq, __mmask32, 256); +FOO (__m256i, 256, epi16, eq, __mmask16, 256); +FOO (__m256i, 256, epi32, eq, __mmask8, 256); +FOO (__m256i, 256, epi64, eq, __mmask8, 256); +FOO (__m256i, 256, epi8, gt, __mmask32, 256); +FOO (__m256i, 256, epi16, gt, __mmask16, 256); +FOO (__m256i, 256, epi32, gt, __mmask8, 256); +FOO (__m256i, 256, epi64, gt, __mmask8, 256); diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr96891-3.c b/gcc/testsuite/gcc.target/i386/avx512f-pr96891-3.c new file mode 100644 index 00000000000..1cf18f2407b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr96891-3.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -mavx512bw -mavx512dq -O2 -masm=att" } */ +/* { dg-final { scan-assembler-not {not[bwlqd]\]} } } */ +/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$5} 4} } */ +/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$6} 4} } */ +/* { dg-final { scan-assembler-times {(?n)vpcmp[bwdq][ \t]*\$7} 4} } */ +/* { dg-final { scan-assembler-times {(?n)vcmpp[sd][ \t]*\$5} 2} } */ +/* { dg-final { scan-assembler-times {(?n)vcmpp[sd][ \t]*\$6} 2} } */ +/* { dg-final { scan-assembler-times {(?n)vcmpp[sd][ \t]*\$7} 2} } */ + +#include + +#define FOO(VTYPE,PREFIX,SUFFIX,MASK,LEN,CMPIMM) \ + MASK \ + foo_##LEN##_##SUFFIX##_##CMPIMM (VTYPE a, VTYPE b) \ + { \ + MASK m = _mm##PREFIX##_cmp_##SUFFIX##_mask (a, b, CMPIMM); \ + return ~m; \ + } \ + +FOO (__m128i,, epi8, __mmask16, 128, 1); +FOO (__m128i,, epi16, __mmask8, 128, 1); +FOO (__m128i,, epi32, __mmask8, 128, 1); +FOO (__m128i,, epi64, __mmask8, 128, 1); +FOO (__m256i, 256, epi8, __mmask32, 256, 2); +FOO (__m256i, 256, epi16, __mmask16, 256, 2); +FOO (__m256i, 256, epi32, __mmask8, 256, 2); +FOO (__m256i, 256, epi64, __mmask8, 256, 2); +FOO (__m512i, 512, epi8, __mmask64, 512, 3); +FOO (__m512i, 512, epi16, __mmask32, 512, 3); +FOO (__m512i, 512, epi32, __mmask16, 512, 3); +FOO (__m512i, 512, epi64, __mmask8, 512, 3); + +FOO (__m128,, ps, __mmask8, 128, 1); +FOO (__m128d,, pd, __mmask8, 128, 1); +FOO (__m256, 256, ps, __mmask8, 256, 2); +FOO (__m256d, 256, pd, __mmask8, 256, 2); +FOO (__m512, 512, ps, __mmask16, 512, 3); +FOO (__m512d, 512, pd, __mmask8, 512, 3); diff --git a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c index 18bf4f0d768..4a9078615aa 100644 --- a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c +++ b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c @@ -40,5 +40,4 @@ foo_andnb (__m512i a, __m512i b) foo = m1 & ~m2; } -/* { dg-final { scan-assembler-times "knotb\[\t \]" "1" } } */ /* { dg-final { scan-assembler-times "kmovb\[\t \]" "4"} } */ -- 2.30.2