From 8b905e9b0c09530c0f660563540257f3d181c2ac Mon Sep 17 00:00:00 2001 From: Hongtao Liu Date: Mon, 9 Dec 2019 04:16:24 +0000 Subject: [PATCH] Enable mask movement for VCOND_EXPR under avx512f for 128/256-bit vector when integer mask is available. Changelog gcc/ PR target/92686 * config/i386/sse.md (*_cmp3, *_cmp3, *_ucmp3, *_ucmp3): New. * config/i386/i386.c (ix86_print_operand): New operand substitution. * config/i386/i386-expand.c (ix86_valid_mask_cmp_mode): New function. (ix86_expand_sse_cmp): Relax condition for integer mask from 512-bit vector to all 128/256/512-bit vector. Delete code gen for avx512f compare patterns since we have generic pattern now. (ix86_expand_sse_movcc): Adjust condition and codegen for maskcmp. (ix86_expand_int_sse_cmp): Don't canonicalize the comparison when corresponding vector compare is available. gcc/testsuite/ * gcc.target/i386/pr92686.inc: New file. * gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: New test. * gcc.target/i386/avx512bw-pr92686-vpcmp-intelasm-1.c: Ditto. * gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto. * gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto. * gcc.target/i386/avx512vl-pr92686-vpcmp-intelasm-1.c: Ditto. * gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto. * gcc.target/i386/avx512bw-pr92686-movcc-1.c: Ditto. * gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto. * gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto. * gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto. * gcc.target/i386/avx512vl-pr88547-1.c: Adjust testcase. * gcc.target/i386/pr88547-1.c: Ditto. From-SVN: r279107 --- gcc/ChangeLog | 19 ++ gcc/config/i386/i386-expand.c | 165 +++++---------- gcc/config/i386/i386.c | 34 ++++ gcc/config/i386/sse.md | 48 +++++ gcc/testsuite/ChangeLog | 16 ++ .../i386/avx512bw-pr92686-movcc-1.c | 133 ++++++++++++ .../i386/avx512bw-pr92686-movcc-2.c | 102 ++++++++++ .../i386/avx512bw-pr92686-vpcmp-1.c | 112 +++++++++++ .../i386/avx512bw-pr92686-vpcmp-2.c | 90 +++++++++ .../i386/avx512bw-pr92686-vpcmp-intelasm-1.c | 110 ++++++++++ .../gcc.target/i386/avx512vl-pr88547-1.c | 8 +- .../i386/avx512vl-pr92686-movcc-1.c | 133 ++++++++++++ .../i386/avx512vl-pr92686-movcc-2.c | 102 ++++++++++ .../i386/avx512vl-pr92686-vpcmp-1.c | 112 +++++++++++ .../i386/avx512vl-pr92686-vpcmp-2.c | 91 +++++++++ .../i386/avx512vl-pr92686-vpcmp-intelasm-1.c | 110 ++++++++++ gcc/testsuite/gcc.target/i386/pr88547-1.c | 16 +- gcc/testsuite/gcc.target/i386/pr92686.inc | 189 ++++++++++++++++++ 18 files changed, 1462 insertions(+), 128 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-intelasm-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-intelasm-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr92686.inc diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e84fb302fd5..9155667222a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2019-12-09 Hongtao Liu + + PR target/92686 + * config/i386/sse.md + (*_cmp3, + *_cmp3, + *_ucmp3, + *_ucmp3): New. + * config/i386/i386.c (ix86_print_operand): New operand substitution. + * config/i386/i386-expand.c (ix86_valid_mask_cmp_mode): + New function. + (ix86_expand_sse_cmp): Relax condition for integer mask from + 512-bit vector to all 128/256/512-bit vector. Delete code gen + for avx512f compare patterns since we have generic pattern now. + (ix86_expand_sse_movcc): Adjust condition and codegen for + maskcmp. + (ix86_expand_int_sse_cmp): Don't canonicalize the comparison + when corresponding vector compare is available. + 2019-12-08 Sandra Loosemore Revert: diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 1ff1153e105..ff3c24cc5b7 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -3422,6 +3422,29 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, return true; } +/* Return true if MODE is valid for vector compare to mask register, + Same result for conditionl vector move with mask register. */ +static bool +ix86_valid_mask_cmp_mode (machine_mode mode) +{ + /* XOP has its own vector conditional movement. */ + if (TARGET_XOP) + return false; + + /* AVX512F is needed for mask operation. */ + if (!(TARGET_AVX512F && VECTOR_MODE_P (mode))) + return false; + + /* AVX512BW is needed for vector QI/HImode, + AVX512VL is needed for 128/256-bit vector. */ + machine_mode inner_mode = GET_MODE_INNER (mode); + int vector_size = GET_MODE_SIZE (mode); + if ((inner_mode == QImode || inner_mode == HImode) && !TARGET_AVX512BW) + return false; + + return vector_size == 64 || TARGET_AVX512VL; +} + /* Expand an SSE comparison. Return the register with the result. */ static rtx @@ -3438,11 +3461,11 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, bool maskcmp = false; rtx x; - if (GET_MODE_SIZE (cmp_ops_mode) == 64) + if (ix86_valid_mask_cmp_mode (cmp_ops_mode)) { unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode); - cmp_mode = int_mode_for_size (nbits, 0).require (); maskcmp = true; + cmp_mode = nbits > 8 ? int_mode_for_size (nbits, 0).require () : E_QImode; } else cmp_mode = cmp_ops_mode; @@ -3461,37 +3484,6 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, || (op_false && reg_overlap_mentioned_p (dest, op_false))) dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); - /* Compare patterns for int modes are unspec in AVX512F only. */ - if (maskcmp && (code == GT || code == EQ)) - { - rtx (*gen)(rtx, rtx, rtx); - - switch (cmp_ops_mode) - { - case E_V64QImode: - gcc_assert (TARGET_AVX512BW); - gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1; - break; - case E_V32HImode: - gcc_assert (TARGET_AVX512BW); - gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1; - break; - case E_V16SImode: - gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1; - break; - case E_V8DImode: - gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1; - break; - default: - gen = NULL; - } - - if (gen) - { - emit_insn (gen (dest, cmp_op0, cmp_op1)); - return dest; - } - } x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1); if (cmp_mode != mode && !maskcmp) @@ -3515,7 +3507,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) machine_mode cmpmode = GET_MODE (cmp); /* In AVX512F the result of comparison is an integer mask. */ - bool maskcmp = (mode != cmpmode && TARGET_AVX512F); + bool maskcmp = mode != cmpmode && ix86_valid_mask_cmp_mode (mode); rtx t2, t3, x; @@ -3529,85 +3521,34 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) if (maskcmp) { - rtx (*gen) (rtx, rtx) = NULL; - if ((op_true == CONST0_RTX (mode) - && vector_all_ones_operand (op_false, mode)) - || (op_false == CONST0_RTX (mode) - && vector_all_ones_operand (op_true, mode))) - switch (mode) - { - case E_V64QImode: - if (TARGET_AVX512BW) - gen = gen_avx512bw_cvtmask2bv64qi; - break; - case E_V32QImode: - if (TARGET_AVX512VL && TARGET_AVX512BW) - gen = gen_avx512vl_cvtmask2bv32qi; - break; - case E_V16QImode: - if (TARGET_AVX512VL && TARGET_AVX512BW) - gen = gen_avx512vl_cvtmask2bv16qi; - break; - case E_V32HImode: - if (TARGET_AVX512BW) - gen = gen_avx512bw_cvtmask2wv32hi; - break; - case E_V16HImode: - if (TARGET_AVX512VL && TARGET_AVX512BW) - gen = gen_avx512vl_cvtmask2wv16hi; - break; - case E_V8HImode: - if (TARGET_AVX512VL && TARGET_AVX512BW) - gen = gen_avx512vl_cvtmask2wv8hi; - break; - case E_V16SImode: - if (TARGET_AVX512DQ) - gen = gen_avx512f_cvtmask2dv16si; - break; - case E_V8SImode: - if (TARGET_AVX512VL && TARGET_AVX512DQ) - gen = gen_avx512vl_cvtmask2dv8si; - break; - case E_V4SImode: - if (TARGET_AVX512VL && TARGET_AVX512DQ) - gen = gen_avx512vl_cvtmask2dv4si; - break; - case E_V8DImode: - if (TARGET_AVX512DQ) - gen = gen_avx512f_cvtmask2qv8di; - break; - case E_V4DImode: - if (TARGET_AVX512VL && TARGET_AVX512DQ) - gen = gen_avx512vl_cvtmask2qv4di; - break; - case E_V2DImode: - if (TARGET_AVX512VL && TARGET_AVX512DQ) - gen = gen_avx512vl_cvtmask2qv2di; - break; - default: - break; - } - if (gen && SCALAR_INT_MODE_P (cmpmode)) - { - cmp = force_reg (cmpmode, cmp); - if (op_true == CONST0_RTX (mode)) + /* Using vector move with mask register. */ + cmp = force_reg (cmpmode, cmp); + /* Optimize for mask zero. */ + op_true = (op_true != CONST0_RTX (mode) + ? force_reg (mode, op_true) : op_true); + op_false = (op_false != CONST0_RTX (mode) + ? force_reg (mode, op_false) : op_false); + if (op_true == CONST0_RTX (mode)) + { + rtx (*gen_not) (rtx, rtx); + switch (cmpmode) { - rtx (*gen_not) (rtx, rtx); - switch (cmpmode) - { - case E_QImode: gen_not = gen_knotqi; break; - case E_HImode: gen_not = gen_knothi; break; - case E_SImode: gen_not = gen_knotsi; break; - case E_DImode: gen_not = gen_knotdi; break; - default: gcc_unreachable (); - } - rtx n = gen_reg_rtx (cmpmode); - emit_insn (gen_not (n, cmp)); - cmp = n; + case E_QImode: gen_not = gen_knotqi; break; + case E_HImode: gen_not = gen_knothi; break; + case E_SImode: gen_not = gen_knotsi; break; + case E_DImode: gen_not = gen_knotdi; break; + default: gcc_unreachable (); } - emit_insn (gen (dest, cmp)); - return; + rtx n = gen_reg_rtx (cmpmode); + emit_insn (gen_not (n, cmp)); + cmp = n; + /* Reverse op_true op_false. */ + std::swap (op_true, op_false); } + + rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp); + emit_insn (gen_rtx_SET (dest, vec_merge)); + return; } else if (vector_all_ones_operand (op_true, mode) && op_false == CONST0_RTX (mode)) @@ -4068,6 +4009,10 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, && (mode == V16QImode || mode == V8HImode || mode == V4SImode || mode == V2DImode)) ; + /* AVX512F supports all of the comparsions + on all 128/256/512-bit vector int types. */ + else if (ix86_valid_mask_cmp_mode (mode)) + ; else { /* Canonicalize the comparison to EQ, GT, GTU. */ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 04cbbd532c0..99d60bc9813 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12468,6 +12468,40 @@ ix86_print_operand (FILE *file, rtx x, int code) } return; + case 'I': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('$', file); + switch (GET_CODE (x)) + { + case EQ: + putc ('0', file); + break; + case NE: + putc ('4', file); + break; + case GE: + case GEU: + putc ('5', file); + break; + case GT: + case GTU: + putc ('6', file); + break; + case LE: + case LEU: + putc ('2', file); + break; + case LT: + case LTU: + putc ('1', file); + break; + default: + output_operand_lossage ("operand is not a condition code, " + "invalid operand code 'I'"); + return; + } + return; + case 'Y': switch (GET_CODE (x)) { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index fb43cafaad0..bbceb8b83ad 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3050,6 +3050,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_insn "*_cmp3" + [(set (match_operand: 0 "register_operand" "=k") + (match_operator: 3 "ix86_comparison_int_operator" + [(match_operand:VI48_AVX512VL 1 "register_operand" "v") + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "")]))] + "TARGET_AVX512F && " + "vpcmp\t{%I3, %2, %1, %0|%0, %1, %2, %I3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + (define_insn "_cmp3" [(set (match_operand: 0 "register_operand" "=k") (unspec: @@ -3064,6 +3076,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_insn "*_cmp3" + [(set (match_operand: 0 "register_operand" "=k") + (match_operator: 3 "ix86_comparison_int_operator" + [(match_operand:VI12_AVX512VL 1 "register_operand" "v") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]))] + "TARGET_AVX512BW" + "vpcmp\t{%I3, %2, %1, %0|%0, %1, %2, %I3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + (define_insn "_ucmp3" [(set (match_operand: 0 "register_operand" "=k") (unspec: @@ -3078,6 +3102,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_insn "*_ucmp3" + [(set (match_operand: 0 "register_operand" "=k") + (match_operator: 3 "ix86_comparison_uns_operator" + [(match_operand:VI12_AVX512VL 1 "register_operand" "v") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]))] + "TARGET_AVX512BW" + "vpcmpu\t{%I3, %2, %1, %0|%0, %1, %2, %I3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + (define_insn "_ucmp3" [(set (match_operand: 0 "register_operand" "=k") (unspec: @@ -3092,6 +3128,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +(define_insn "*_ucmp3" + [(set (match_operand: 0 "register_operand" "=k") + (match_operator: 3 "ix86_comparison_uns_operator" + [(match_operand:VI48_AVX512VL 1 "register_operand" "v") + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]))] + "TARGET_AVX512F" + "vpcmpu\t{%I3, %2, %1, %0|%0, %1, %2, %I3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + (define_insn "avx512f_vmcmp3" [(set (match_operand: 0 "register_operand" "=k") (and: diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d069d0ca2c3..c6094cc3913 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,19 @@ +2019-12-09 Hongtao Liu + + * gcc.target/i386/pr92686.inc: New file. + * gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: New test. + * gcc.target/i386/avx512bw-pr92686-vpcmp-intelasm-1.c: Ditto. + * gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto. + * gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto. + * gcc.target/i386/avx512vl-pr92686-vpcmp-intelasm-1.c: Ditto. + * gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto. + * gcc.target/i386/avx512bw-pr92686-movcc-1.c: Ditto. + * gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto. + * gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto. + * gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto. + * gcc.target/i386/avx512vl-pr88547-1.c: Adjust testcase. + * gcc.target/i386/pr88547-1.c: Ditto. + 2019-12-08 Andrew Pinski * gcc.c-torture/execute/bswap-3.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c new file mode 100644 index 00000000000..2a89077ed03 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c @@ -0,0 +1,133 @@ +/* PR target/92686 */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop -mprefer-vector-width=512" } */ +/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ +/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ +/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ +/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ + +__attribute__((noipa)) void +f1 (char *__restrict dst, char *__restrict src1, char *__restrict src2) +{ + for (int i = 0; i != 64; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f2 (unsigned char *__restrict dst, unsigned char *__restrict src1, + unsigned char *__restrict src2) +{ + for (int i = 0; i != 64; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f3 (char *__restrict dst, char *__restrict src1, char *__restrict src2) +{ + for (int i = 0; i != 64; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f4 (unsigned char *__restrict dst, unsigned char *__restrict src1, + unsigned char *__restrict src2) +{ + for (int i = 0; i != 64; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f5 (short *__restrict dst, short *__restrict src1, short *__restrict src2) +{ + for (int i = 0; i != 32; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f6 (unsigned short *__restrict dst, unsigned short *__restrict src1, + unsigned short *__restrict src2) +{ + for (int i = 0; i != 32; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f7 (short *__restrict dst, short *__restrict src1, short *__restrict src2) +{ + for (int i = 0; i != 32; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f8 (unsigned short *__restrict dst, unsigned short *__restrict src1, + unsigned short *__restrict src2) +{ + for (int i = 0; i != 32; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f9 (int *__restrict dst, int *__restrict src1, int *__restrict src2) +{ + for (int i = 0; i != 16; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f10 (unsigned int *__restrict dst, unsigned int *__restrict src1, + unsigned int *__restrict src2) +{ + for (int i = 0; i != 16; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f11 (int *__restrict dst, int *__restrict src1, int *__restrict src2) +{ + for (int i = 0; i != 16; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f12 (unsigned int *__restrict dst, unsigned int *__restrict src1, + unsigned int *__restrict src2) +{ + for (int i = 0; i != 16; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f13 (long long int *__restrict dst, long long int *__restrict src1, + long long int *__restrict src2) +{ + for (int i = 0; i != 8; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f14 (unsigned long long int *__restrict dst, + unsigned long long int *__restrict src1, + unsigned long long int *__restrict src2) +{ + for (int i = 0; i != 8; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f15 (long long int *__restrict dst, long long int *__restrict src1, + long long int *__restrict src2) +{ + for (int i = 0; i != 8; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f16 (unsigned long long int *__restrict dst, + unsigned long long int *__restrict src1, + unsigned long long int *__restrict src2) +{ + for (int i = 0; i != 8; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c new file mode 100644 index 00000000000..53a7da12329 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c @@ -0,0 +1,102 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-options "-Ofast -mavx512bw -mavx512vl -mprefer-vector-width=256" } */ + +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#ifndef TEST +#define TEST avx512bw_test +#endif + +#include "avx512bw-pr92686-movcc-1.c" +#include "pr92686.inc" + +#define NUM 512 + + +#define TEST_SIGNED(vtype, type, N, fn, fn2, op) \ +do \ + { \ + type dst[NUM], src1[NUM], src2[NUM]; \ + int i, j, sign = 1; \ + type res[N]; \ + for (i = 0; i < NUM; i++) \ + { \ + src1[i] = i * i * sign; \ + src2[i] = (i + 20) * sign; \ + dst[i] = i * i * i + 100; \ + sign = -sign; \ + } \ + for (i = 0; i < NUM; i += N) \ + { \ + for (j = 0; j < N; j++) \ + res[j] = dst[i + j]; \ + fn (&dst[i], &src1[i], &src2[i]); \ + for (j = 0; j < N; j++) \ + { \ + res[j] = fn2 (res[j], src1[i + j], \ + src2[i+ j], op); \ + if (res[j] != dst[i+ j]) \ + abort(); \ + } \ + } \ + } \ +while (0) + +#define TEST_UNSIGNED(vtype, type, N, fn, fn2, op) \ +do \ + { \ + type dst[NUM], src1[NUM], src2[NUM]; \ + int i,j; \ + type res[N]; \ + \ + for (i = 0; i < NUM; i++) \ + { \ + src1[i] = i * i; \ + src2[i] = i + 20; \ + dst[i] = i * i * i + 100; \ + if ((i % 4)) \ + src2[i] |= (1ULL << (sizeof (type) \ + * __CHAR_BIT__ - 1)); \ + } \ + for (i = 0; i < NUM; i += N) \ + { \ + for (j = 0; j < N; j++) \ + res[j] = dst[i + j]; \ + fn (&dst[i], &src1[i], &src2[i]); \ + for (j = 0; j < N; j++) \ + { \ + res[j] = fn2 (res[j], src1[i + j], \ + src2[i + j], op); \ + if (res[j] != dst[i + j]) \ + abort(); \ + } \ + } \ + } \ +while (0) + +static void +TEST (void) +{ + TEST_SIGNED (v64qi, signed char, 64, f1, cmpb, 5); + TEST_UNSIGNED (v64uqi, unsigned char, 64, f2, cmpub, 5); + TEST_SIGNED (v64qi, signed char, 64, f3, cmpb, 2); + TEST_UNSIGNED (v64uqi, unsigned char, 64, f4, cmpub, 2); + TEST_SIGNED (v32hi, short int, 32, f5, cmpw, 5); + TEST_UNSIGNED (v32uhi, unsigned short int, 32, f6, cmpuw, 5); + TEST_SIGNED (v32hi, short int, 32, f7, cmpw, 2); + TEST_UNSIGNED (v32uhi, unsigned short int, 32, f8, cmpuw, 2); + TEST_SIGNED (v16si, int, 16, f9, cmpd, 5); + TEST_UNSIGNED (v16usi, unsigned int, 16, f10, cmpud, 5); + TEST_SIGNED (v16si, int, 16, f11, cmpd, 2); + TEST_UNSIGNED (v16usi, unsigned int, 16, f12, cmpud, 2); + TEST_SIGNED (v8di, long long int, 8, f13, cmpq, 5); + TEST_UNSIGNED (v8udi, unsigned long long int, 8, f14, cmpuq, 5); + TEST_SIGNED (v8di, long long int, 8, f15, cmpq, 2); + TEST_UNSIGNED (v8udi, unsigned long long int, 8, f16, cmpuq, 2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c new file mode 100644 index 00000000000..4fd3b369d0f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c @@ -0,0 +1,112 @@ +/* PR target/92686 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop" } */ +/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[\t ]" 8 } } */ + +typedef char v64qi __attribute__((vector_size(64))); +typedef unsigned char v64uqi __attribute__((vector_size(64))); +typedef short v32hi __attribute__((vector_size(64))); +typedef unsigned short v32uhi __attribute__((vector_size(64))); +typedef int v16si __attribute__((vector_size(64))); +typedef unsigned v16usi __attribute__((vector_size(64))); +typedef long long v8di __attribute__((vector_size(64))); +typedef unsigned long long v8udi __attribute__((vector_size(64))); + +__attribute__((noipa)) v64qi +f1 (v64qi x, v64qi y) +{ + return x >= y; +} + +__attribute__((noipa)) v64uqi +f2 (v64uqi x, v64uqi y) +{ + return x >= y; +} + +__attribute__((noipa)) v64qi +f3 (v64qi x, v64qi y) +{ + return x <= y; +} + +__attribute__((noipa)) v64uqi +f4 (v64uqi x, v64uqi y) +{ + return x <= y; +} + +__attribute__((noipa)) v32hi +f5 (v32hi x, v32hi y) +{ + return x >= y; +} + +__attribute__((noipa)) v32uhi +f6 (v32uhi x, v32uhi y) +{ + return x >= y; +} + +__attribute__((noipa)) v32hi +f7 (v32hi x, v32hi y) +{ + return x <= y; +} + +__attribute__((noipa)) v32uhi +f8 (v32uhi x, v32uhi y) +{ + return x <= y; +} + +__attribute__((noipa)) v16si +f9 (v16si x, v16si y) +{ + return x >= y; +} + +__attribute__((noipa)) v16usi +f10 (v16usi x, v16usi y) +{ + return x >= y; +} + +__attribute__((noipa)) v16si +f11 (v16si x, v16si y) +{ + return x <= y; +} + +__attribute__((noipa)) v16usi +f12 (v16usi x, v16usi y) +{ + return x <= y; +} + +__attribute__((noipa)) v8di +f13 (v8di x, v8di y) +{ + return x >= y; +} + +__attribute__((noipa)) v8udi +f14 (v8udi x, v8udi y) +{ + return x >= y; +} + +__attribute__((noipa)) v8di +f15 (v8di x, v8di y) +{ + return x <= y; +} + +__attribute__((noipa)) v8udi +f16 (v8udi x, v8udi y) +{ + return x <= y; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c new file mode 100644 index 00000000000..0ea5b56eccf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c @@ -0,0 +1,90 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-options "-O2 -mavx512bw" } */ + +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#ifndef TEST +#define TEST avx512bw_test +#endif + +#include "avx512bw-pr92686-vpcmp-1.c" + +#define NUM 512 + +#define TEST_SIGNED(vtype, type, N, fn, op) \ +do \ + { \ + union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ + int i, sign = 1; \ + type res; \ + for (i = 0; i < NUM; i++) \ + { \ + src1.i[i] = i * i * sign; \ + src2.i[i] = (i + 20) * sign; \ + sign = -sign; \ + } \ + for (i = 0; i < NUM; i += N) \ + dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ + \ + for (i = 0; i < NUM; i++) \ + { \ + res = src1.i[i] op src2.i[i] ? -1 : 0; \ + if (res != dst.i[i]) \ + abort (); \ + } \ + } \ +while (0) + +#define TEST_UNSIGNED(vtype, type, N, fn, op) \ +do \ + { \ + union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ + int i; \ + type res; \ + \ + for (i = 0; i < NUM; i++) \ + { \ + src1.i[i] = i * i; \ + src2.i[i] = i + 20; \ + if ((i % 4)) \ + src2.i[i] |= (1ULL << (sizeof (type) \ + * __CHAR_BIT__ - 1)); \ + } \ + \ + for (i = 0; i < NUM; i += N) \ + dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ + \ + for (i = 0; i < NUM; i++) \ + { \ + res = src1.i[i] op src2.i[i] ? -1 : 0; \ + if (res != dst.i[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +TEST (void) +{ + TEST_SIGNED (v64qi, signed char, 64, f1, >=); + TEST_UNSIGNED (v64uqi, unsigned char, 64, f2, >=); + TEST_SIGNED (v64qi, signed char, 64, f3, <=); + TEST_UNSIGNED (v64uqi, unsigned char, 64, f4, <=); + TEST_SIGNED (v32hi, short int, 32, f5, >=); + TEST_UNSIGNED (v32uhi, unsigned short int, 32, f6, >=); + TEST_SIGNED (v32hi, short int, 32, f7, <=); + TEST_UNSIGNED (v32uhi, unsigned short int, 32, f8, <=); + TEST_SIGNED (v16si, int, 16, f9, >=); + TEST_UNSIGNED (v16usi, unsigned int, 16, f10, >=); + TEST_SIGNED (v16si, int, 16, f11, <=); + TEST_UNSIGNED (v16usi, unsigned int, 16, f12, <=); + TEST_SIGNED (v8di, long long int, 8, f13, >=); + TEST_UNSIGNED (v8udi, unsigned long long int, 8, f14, >=); + TEST_SIGNED (v8di, long long int, 8, f15, <=); + TEST_UNSIGNED (v8udi, unsigned long long int, 8, f16, <=); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-intelasm-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-intelasm-1.c new file mode 100644 index 00000000000..23c785ce9da --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-intelasm-1.c @@ -0,0 +1,110 @@ +/* PR target/92686 */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop -masm=intel" } */ +/* { dg-require-effective-target avx512bw } */ + + +typedef char v64qi __attribute__((vector_size(64))); +typedef unsigned char v64uqi __attribute__((vector_size(64))); +typedef short v32hi __attribute__((vector_size(64))); +typedef unsigned short v32uhi __attribute__((vector_size(64))); +typedef int v16si __attribute__((vector_size(64))); +typedef unsigned v16usi __attribute__((vector_size(64))); +typedef long long v8di __attribute__((vector_size(64))); +typedef unsigned long long v8udi __attribute__((vector_size(64))); + +__attribute__((noipa)) v64qi +f1 (v64qi x, v64qi y) +{ + return x >= y; +} + +__attribute__((noipa)) v64uqi +f2 (v64uqi x, v64uqi y) +{ + return x >= y; +} + +__attribute__((noipa)) v64qi +f3 (v64qi x, v64qi y) +{ + return x <= y; +} + +__attribute__((noipa)) v64uqi +f4 (v64uqi x, v64uqi y) +{ + return x <= y; +} + +__attribute__((noipa)) v32hi +f5 (v32hi x, v32hi y) +{ + return x >= y; +} + +__attribute__((noipa)) v32uhi +f6 (v32uhi x, v32uhi y) +{ + return x >= y; +} + +__attribute__((noipa)) v32hi +f7 (v32hi x, v32hi y) +{ + return x <= y; +} + +__attribute__((noipa)) v32uhi +f8 (v32uhi x, v32uhi y) +{ + return x <= y; +} + +__attribute__((noipa)) v16si +f9 (v16si x, v16si y) +{ + return x >= y; +} + +__attribute__((noipa)) v16usi +f10 (v16usi x, v16usi y) +{ + return x >= y; +} + +__attribute__((noipa)) v16si +f11 (v16si x, v16si y) +{ + return x <= y; +} + +__attribute__((noipa)) v16usi +f12 (v16usi x, v16usi y) +{ + return x <= y; +} + +__attribute__((noipa)) v8di +f13 (v8di x, v8di y) +{ + return x >= y; +} + +__attribute__((noipa)) v8udi +f14 (v8udi x, v8udi y) +{ + return x >= y; +} + +__attribute__((noipa)) v8di +f15 (v8di x, v8di y) +{ + return x <= y; +} + +__attribute__((noipa)) v8udi +f16 (v8udi x, v8udi y) +{ + return x <= y; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c index aa64dc2f06e..a3ffeca4354 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c @@ -6,9 +6,7 @@ /* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */ /* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */ /* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "vpminsq\[\t ]" 2 } } */ - +/* { dg-final { scan-assembler-times "vpcmp\[dq\]\[\t ]" 4 } } */ +/* { dg-final { scan-assembler-times "vpcmpu\[dq\]\[\t ]" 4 } } */ +/* { dg-final { scan-assembler-times "vpternlog\[qd\]\[\t ]" 8 } } */ #include "avx2-pr88547-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c new file mode 100644 index 00000000000..1b9644a3790 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c @@ -0,0 +1,133 @@ +/* PR target/92686 */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512bw -mavx512vl -mno-xop -mprefer-vector-width=256" } */ +/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ +/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ +/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ +/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */ + +__attribute__((noipa)) void +f1 (char *__restrict dst, char *__restrict src1, char *__restrict src2) +{ + for (int i = 0; i != 32; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f2 (unsigned char *__restrict dst, unsigned char *__restrict src1, + unsigned char *__restrict src2) +{ + for (int i = 0; i != 32; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f3 (char *__restrict dst, char *__restrict src1, char *__restrict src2) +{ + for (int i = 0; i != 32; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f4 (unsigned char *__restrict dst, unsigned char *__restrict src1, + unsigned char *__restrict src2) +{ + for (int i = 0; i != 32; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f5 (short *__restrict dst, short *__restrict src1, short *__restrict src2) +{ + for (int i = 0; i != 16; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f6 (unsigned short *__restrict dst, unsigned short *__restrict src1, + unsigned short *__restrict src2) +{ + for (int i = 0; i != 16; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f7 (short *__restrict dst, short *__restrict src1, short *__restrict src2) +{ + for (int i = 0; i != 16; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f8 (unsigned short *__restrict dst, unsigned short *__restrict src1, + unsigned short *__restrict src2) +{ + for (int i = 0; i != 16; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f9 (int *__restrict dst, int *__restrict src1, int *__restrict src2) +{ + for (int i = 0; i != 8; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f10 (unsigned int *__restrict dst, unsigned int *__restrict src1, + unsigned int *__restrict src2) +{ + for (int i = 0; i != 8; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f11 (int *__restrict dst, int *__restrict src1, int *__restrict src2) +{ + for (int i = 0; i != 8; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f12 (unsigned int *__restrict dst, unsigned int *__restrict src1, + unsigned int *__restrict src2) +{ + for (int i = 0; i != 8; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f13 (long long int *__restrict dst, long long int *__restrict src1, + long long int *__restrict src2) +{ + for (int i = 0; i != 4; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f14 (unsigned long long int *__restrict dst, + unsigned long long int *__restrict src1, + unsigned long long int *__restrict src2) +{ + for (int i = 0; i != 4; i++) + dst[i] = src1[i] >= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f15 (long long int *__restrict dst, long long int *__restrict src1, + long long int *__restrict src2) +{ + for (int i = 0; i != 4; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} + +__attribute__((noipa)) void +f16 (unsigned long long int *__restrict dst, + unsigned long long int *__restrict src1, + unsigned long long int *__restrict src2) +{ + for (int i = 0; i != 4; i++) + dst[i] = src1[i] <= src2[i] ? src1[i] : dst[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c new file mode 100644 index 00000000000..5f5562b98f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c @@ -0,0 +1,102 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-options "-Ofast -mavx512bw -mavx512vl -mprefer-vector-width=256" } */ + +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#ifndef TEST +#define TEST avx512vl_test +#endif + +#include "avx512vl-pr92686-movcc-1.c" +#include "pr92686.inc" + +#define NUM 256 + + +#define TEST_SIGNED(vtype, type, N, fn, fn2, op) \ +do \ + { \ + type dst[NUM], src1[NUM], src2[NUM]; \ + int i, j, sign = 1; \ + type res[N]; \ + for (i = 0; i < NUM; i++) \ + { \ + src1[i] = i * i * sign; \ + src2[i] = (i + 20) * sign; \ + dst[i] = i * i * i + 100; \ + sign = -sign; \ + } \ + for (i = 0; i < NUM; i += N) \ + { \ + for (j = 0; j < N; j++) \ + res[j] = dst[i + j]; \ + fn (&dst[i], &src1[i], &src2[i]); \ + for (j = 0; j < N; j++) \ + { \ + res[j] = fn2 (res[j], src1[i + j], \ + src2[i+ j], op); \ + if (res[j] != dst[i+ j]) \ + abort(); \ + } \ + } \ + } \ +while (0) + +#define TEST_UNSIGNED(vtype, type, N, fn, fn2, op) \ +do \ + { \ + type dst[NUM], src1[NUM], src2[NUM]; \ + int i,j; \ + type res[N]; \ + \ + for (i = 0; i < NUM; i++) \ + { \ + src1[i] = i * i; \ + src2[i] = i + 20; \ + dst[i] = i * i * i + 100; \ + if ((i % 4)) \ + src2[i] |= (1ULL << (sizeof (type) \ + * __CHAR_BIT__ - 1)); \ + } \ + for (i = 0; i < NUM; i += N) \ + { \ + for (j = 0; j < N; j++) \ + res[j] = dst[i + j]; \ + fn (&dst[i], &src1[i], &src2[i]); \ + for (j = 0; j < N; j++) \ + { \ + res[j] = fn2 (res[j], src1[i + j], \ + src2[i + j], op); \ + if (res[j] != dst[i + j]) \ + abort(); \ + } \ + } \ + } \ +while (0) + +static void +TEST (void) +{ + TEST_SIGNED (v32qi, signed char, 32, f1, cmpb, 5); + TEST_UNSIGNED (v32uqi, unsigned char, 32, f2, cmpub, 5); + TEST_SIGNED (v32qi, signed char, 32, f3, cmpb, 2); + TEST_UNSIGNED (v32uqi, unsigned char, 32, f4, cmpub, 2); + TEST_SIGNED (v16hi, short int, 16, f5, cmpw, 5); + TEST_UNSIGNED (v16uhi, unsigned short int, 16, f6, cmpuw, 5); + TEST_SIGNED (v16hi, short int, 16, f7, cmpw, 2); + TEST_UNSIGNED (v16uhi, unsigned short int, 16, f8, cmpuw, 2); + TEST_SIGNED (v8si, int, 8, f9, cmpd, 5); + TEST_UNSIGNED (v8usi, unsigned int, 8, f10, cmpud, 5); + TEST_SIGNED (v8si, int, 8, f11, cmpd, 2); + TEST_UNSIGNED (v8usi, unsigned int, 8, f12, cmpud, 2); + TEST_SIGNED (v4di, long long int, 4, f13, cmpq, 5); + TEST_UNSIGNED (v4udi, unsigned long long int, 4, f14, cmpuq, 5); + TEST_SIGNED (v4di, long long int, 4, f15, cmpq, 2); + TEST_UNSIGNED (v4udi, unsigned long long int, 4, f16, cmpuq, 2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c new file mode 100644 index 00000000000..5b79d4d36f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c @@ -0,0 +1,112 @@ +/* PR target/88547 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl -mno-avx512dq -mno-xop" } */ +/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[\t ]" 8 } } */ + +typedef signed char v32qi __attribute__((vector_size(32))); +typedef unsigned char v32uqi __attribute__((vector_size(32))); +typedef short v16hi __attribute__((vector_size(32))); +typedef unsigned short v16uhi __attribute__((vector_size(32))); +typedef int v8si __attribute__((vector_size(32))); +typedef unsigned v8usi __attribute__((vector_size(32))); +typedef long long v4di __attribute__((vector_size(32))); +typedef unsigned long long v4udi __attribute__((vector_size(32))); + +__attribute__((noipa)) v32qi +f1 (v32qi x, v32qi y) +{ + return x >= y; +} + +__attribute__((noipa)) v32uqi +f2 (v32uqi x, v32uqi y) +{ + return x >= y; +} + +__attribute__((noipa)) v32qi +f3 (v32qi x, v32qi y) +{ + return x <= y; +} + +__attribute__((noipa)) v32uqi +f4 (v32uqi x, v32uqi y) +{ + return x <= y; +} + +__attribute__((noipa)) v16hi +f5 (v16hi x, v16hi y) +{ + return x >= y; +} + +__attribute__((noipa)) v16uhi +f6 (v16uhi x, v16uhi y) +{ + return x >= y; +} + +__attribute__((noipa)) v16hi +f7 (v16hi x, v16hi y) +{ + return x <= y; +} + +__attribute__((noipa)) v16uhi +f8 (v16uhi x, v16uhi y) +{ + return x <= y; +} + +__attribute__((noipa)) v8si +f9 (v8si x, v8si y) +{ + return x >= y; +} + +__attribute__((noipa)) v8usi +f10 (v8usi x, v8usi y) +{ + return x >= y; +} + +__attribute__((noipa)) v8si +f11 (v8si x, v8si y) +{ + return x <= y; +} + +__attribute__((noipa)) v8usi +f12 (v8usi x, v8usi y) +{ + return x <= y; +} + +__attribute__((noipa)) v4di +f13 (v4di x, v4di y) +{ + return x >= y; +} + +__attribute__((noipa)) v4udi +f14 (v4udi x, v4udi y) +{ + return x >= y; +} + +__attribute__((noipa)) v4di +f15 (v4di x, v4di y) +{ + return x <= y; +} + +__attribute__((noipa)) v4udi +f16 (v4udi x, v4udi y) +{ + return x <= y; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c new file mode 100644 index 00000000000..6be24ff30f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c @@ -0,0 +1,91 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl" } */ + +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#ifndef TEST +#define TEST avx512vl_test +#endif + +#include "avx512vl-pr92686-vpcmp-1.c" + +#define NUM 256 + +#define TEST_SIGNED(vtype, type, N, fn, op) \ +do \ + { \ + union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ + int i, sign = 1; \ + type res; \ + for (i = 0; i < NUM; i++) \ + { \ + src1.i[i] = i * i * sign; \ + src2.i[i] = (i + 20) * sign; \ + sign = -sign; \ + } \ + for (i = 0; i < NUM; i += N) \ + dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ + \ + for (i = 0; i < NUM; i++) \ + { \ + res = src1.i[i] op src2.i[i] ? -1 : 0; \ + if (res != dst.i[i]) \ + abort (); \ + } \ + } \ +while (0) + +#define TEST_UNSIGNED(vtype, type, N, fn, op) \ +do \ + { \ + union { vtype x[NUM / N]; type i[NUM]; } dst, src1, src2; \ + int i; \ + type res; \ + \ + for (i = 0; i < NUM; i++) \ + { \ + src1.i[i] = i * i; \ + src2.i[i] = i + 20; \ + if ((i % 4)) \ + src2.i[i] |= (1ULL << (sizeof (type) \ + * __CHAR_BIT__ - 1)); \ + } \ + \ + for (i = 0; i < NUM; i += N) \ + dst.x[i / N] = fn (src1.x[i / N], src2.x[i / N]); \ + \ + for (i = 0; i < NUM; i++) \ + { \ + res = src1.i[i] op src2.i[i] ? -1 : 0; \ + if (res != dst.i[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +TEST (void) +{ + TEST_SIGNED (v32qi, signed char, 32, f1, >=); + TEST_UNSIGNED (v32uqi, unsigned char, 32, f2, >=); + TEST_SIGNED (v32qi, signed char, 32, f3, <=); + TEST_UNSIGNED (v32uqi, unsigned char, 32, f4, <=); + TEST_SIGNED (v16hi, short int, 16, f5, >=); + TEST_UNSIGNED (v16uhi, unsigned short int, 16, f6, >=); + TEST_SIGNED (v16hi, short int, 16, f7, <=); + TEST_UNSIGNED (v16uhi, unsigned short int, 16, f8, <=); + TEST_SIGNED (v8si, int, 8, f9, >=); + TEST_UNSIGNED (v8usi, unsigned int, 8, f10, >=); + TEST_SIGNED (v8si, int, 8, f11, <=); + TEST_UNSIGNED (v8usi, unsigned int, 8, f12, <=); + TEST_SIGNED (v4di, long long int, 4, f13, >=); + TEST_UNSIGNED (v4udi, unsigned long long int, 4, f14, >=); + TEST_SIGNED (v4di, long long int, 4, f15, <=); + TEST_UNSIGNED (v4udi, unsigned long long int, 4, f16, <=); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-intelasm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-intelasm-1.c new file mode 100644 index 00000000000..c9a1b690035 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-intelasm-1.c @@ -0,0 +1,110 @@ +/* PR target/88547 */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl -mno-avx512dq -mno-xop -masm=intel" } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512vl } */ + +typedef signed char v32qi __attribute__((vector_size(32))); +typedef unsigned char v32uqi __attribute__((vector_size(32))); +typedef short v16hi __attribute__((vector_size(32))); +typedef unsigned short v16uhi __attribute__((vector_size(32))); +typedef int v8si __attribute__((vector_size(32))); +typedef unsigned v8usi __attribute__((vector_size(32))); +typedef long long v4di __attribute__((vector_size(32))); +typedef unsigned long long v4udi __attribute__((vector_size(32))); + +__attribute__((noipa)) v32qi +f1 (v32qi x, v32qi y) +{ + return x >= y; +} + +__attribute__((noipa)) v32uqi +f2 (v32uqi x, v32uqi y) +{ + return x >= y; +} + +__attribute__((noipa)) v32qi +f3 (v32qi x, v32qi y) +{ + return x <= y; +} + +__attribute__((noipa)) v32uqi +f4 (v32uqi x, v32uqi y) +{ + return x <= y; +} + +__attribute__((noipa)) v16hi +f5 (v16hi x, v16hi y) +{ + return x >= y; +} + +__attribute__((noipa)) v16uhi +f6 (v16uhi x, v16uhi y) +{ + return x >= y; +} + +__attribute__((noipa)) v16hi +f7 (v16hi x, v16hi y) +{ + return x <= y; +} + +__attribute__((noipa)) v16uhi +f8 (v16uhi x, v16uhi y) +{ + return x <= y; +} + +__attribute__((noipa)) v8si +f9 (v8si x, v8si y) +{ + return x >= y; +} + +__attribute__((noipa)) v8usi +f10 (v8usi x, v8usi y) +{ + return x >= y; +} + +__attribute__((noipa)) v8si +f11 (v8si x, v8si y) +{ + return x <= y; +} + +__attribute__((noipa)) v8usi +f12 (v8usi x, v8usi y) +{ + return x <= y; +} + +__attribute__((noipa)) v4di +f13 (v4di x, v4di y) +{ + return x >= y; +} + +__attribute__((noipa)) v4udi +f14 (v4udi x, v4udi y) +{ + return x >= y; +} + +__attribute__((noipa)) v4di +f15 (v4di x, v4di y) +{ + return x <= y; +} + +__attribute__((noipa)) v4udi +f16 (v4udi x, v4udi y) +{ + return x <= y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88547-1.c b/gcc/testsuite/gcc.target/i386/pr88547-1.c index b6c82157bba..fa6832c7fab 100644 --- a/gcc/testsuite/gcc.target/i386/pr88547-1.c +++ b/gcc/testsuite/gcc.target/i386/pr88547-1.c @@ -1,19 +1,9 @@ /* PR target/88547 */ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512dq" } */ -/* { dg-final { scan-assembler-not "vpternlog" } } */ -/* { dg-final { scan-assembler-times "vpmovm2b\[\t ]" 4 } } */ -/* { dg-final { scan-assembler-times "vpmovm2w\[\t ]" 4 } } */ -/* { dg-final { scan-assembler-times "vpmovm2d\[\t ]" 4 } } */ -/* { dg-final { scan-assembler-times "vpmovm2q\[\t ]" 4 } } */ -/* { dg-final { scan-assembler-times "knotb\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "knotw\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "knotd\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "knotq\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */ -/* { dg-final { scan-assembler-not "vpsubd\[\t ]" } } */ -/* { dg-final { scan-assembler-not "vpsubq\[\t ]" } } */ +/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */ +/* { dg-final { scan-assembler-times "vpmovm2\[bwdq\]\[\t ]" 16 } } */ typedef signed char v64qi __attribute__((vector_size(64))); typedef unsigned char v64uqi __attribute__((vector_size(64))); diff --git a/gcc/testsuite/gcc.target/i386/pr92686.inc b/gcc/testsuite/gcc.target/i386/pr92686.inc new file mode 100644 index 00000000000..260581ed63b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92686.inc @@ -0,0 +1,189 @@ +/* Include by avx512bw-pr92686-movcc-2.c, avx512vl-pr92686-movcc-2.c */ +__attribute__((noipa)) char +cmpb (char dst, char src1, char src2, int num) +{ + switch(num) + { + case 0: + return src1 == src2 ? src1 : dst; + case 1: + return src1 < src2 ? src1 : dst; + case 2: + return src1 <= src2 ? src1 : dst; + case 4: + return src1 != src2 ? src1 : dst; + case 5: + return src1 >= src2 ? src1 : dst; + case 6: + return src1 > src2 ? src1 : dst; + default: + abort(); + } + abort(); +} + +__attribute__((noipa)) unsigned char +cmpub (unsigned char dst, unsigned char src1, + unsigned char src2, int num) +{ + switch(num) + { + case 0: + return src1 == src2 ? src1 : dst; + case 1: + return src1 < src2 ? src1 : dst; + case 2: + return src1 <= src2 ? src1 : dst; + case 4: + return src1 != src2 ? src1 : dst; + case 5: + return src1 >= src2 ? src1 : dst; + case 6: + return src1 > src2 ? src1 : dst; + default: + abort(); + } + abort(); +} + +__attribute__((noipa)) short +cmpw (short dst, short src1, short src2, int num) +{ + switch(num) + { + case 0: + return src1 == src2 ? src1 : dst; + case 1: + return src1 < src2 ? src1 : dst; + case 2: + return src1 <= src2 ? src1 : dst; + case 4: + return src1 != src2 ? src1 : dst; + case 5: + return src1 >= src2 ? src1 : dst; + case 6: + return src1 > src2 ? src1 : dst; + default: + abort(); + } + abort(); +} + +__attribute__((noipa)) unsigned short +cmpuw (unsigned short dst, unsigned short src1, + unsigned short src2, int num) +{ + switch(num) + { + case 0: + return src1 == src2 ? src1 : dst; + case 1: + return src1 < src2 ? src1 : dst; + case 2: + return src1 <= src2 ? src1 : dst; + case 4: + return src1 != src2 ? src1 : dst; + case 5: + return src1 >= src2 ? src1 : dst; + case 6: + return src1 > src2 ? src1 : dst; + default: + abort(); + } + abort(); +} + +__attribute__((noipa)) int +cmpd (int dst, int src1, int src2, int num) +{ + switch(num) + { + case 0: + return src1 == src2 ? src1 : dst; + case 1: + return src1 < src2 ? src1 : dst; + case 2: + return src1 <= src2 ? src1 : dst; + case 4: + return src1 != src2 ? src1 : dst; + case 5: + return src1 >= src2 ? src1 : dst; + case 6: + return src1 > src2 ? src1 : dst; + default: + abort(); + } + abort(); +} + +__attribute__((noipa)) unsigned int +cmpud (unsigned int dst, unsigned int src1, + unsigned int src2, int num) +{ + switch(num) + { + case 0: + return src1 == src2 ? src1 : dst; + case 1: + return src1 < src2 ? src1 : dst; + case 2: + return src1 <= src2 ? src1 : dst; + case 4: + return src1 != src2 ? src1 : dst; + case 5: + return src1 >= src2 ? src1 : dst; + case 6: + return src1 > src2 ? src1 : dst; + default: + abort(); + } + abort(); +} + +__attribute__((noipa)) long long int +cmpq (long long int dst, long long int src1, + long long int src2, int num) +{ + switch(num) + { + case 0: + return src1 == src2 ? src1 : dst; + case 1: + return src1 < src2 ? src1 : dst; + case 2: + return src1 <= src2 ? src1 : dst; + case 4: + return src1 != src2 ? src1 : dst; + case 5: + return src1 >= src2 ? src1 : dst; + case 6: + return src1 > src2 ? src1 : dst; + default: + abort(); + } + abort(); +} + +__attribute__((noipa)) unsigned long long int +cmpuq (unsigned long long int dst, unsigned long long int src1, + unsigned long long int src2, int num) +{ + switch(num) + { + case 0: + return src1 == src2 ? src1 : dst; + case 1: + return src1 < src2 ? src1 : dst; + case 2: + return src1 <= src2 ? src1 : dst; + case 4: + return src1 != src2 ? src1 : dst; + case 5: + return src1 >= src2 ? src1 : dst; + case 6: + return src1 > src2 ? src1 : dst; + default: + abort(); + } + abort(); +} -- 2.30.2