From: H.J. Lu Date: Mon, 3 Jun 2019 02:20:33 +0000 (+0000) Subject: re PR target/89750 (Wrong code for _mm_comi_round_ss) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=467e9f383c34a368d41814d64691faa1d34b9819;p=gcc.git re PR target/89750 (Wrong code for _mm_comi_round_ss) 2019-05-06 H.J. Lu Hongtao Liu PR target/89750 PR target/86444 * config/i386/i386-expand.c (ix86_expand_sse_comi_round): Modified, original implementation isn't correct. 2019-05-06 H.J. Lu Hongtao Liu PR target/89750 PR target/86444 * gcc.target/i386/avx512f-vcomisd-2.c: New. * gcc.target/i386/avx512f-vcomisd-2.c: Likewise. Co-Authored-By: Hongtao Liu From-SVN: r271853 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0d4ba896ea4..3563d6dd375 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -2576,6 +2576,14 @@ Fix test-suite. * tree-ssa-phiopt.c (two_value_replacement): Fix a typo in parameter detection. +2019-05-06 H.J. Lu + Hongtao Liu + + PR target/89750 + PR target/86444 + * config/i386/i386-expand.c (ix86_expand_sse_comi_round): + Modified, original implementation isn't correct. + 2019-05-06 Segher Boessenkool * config/rs6000/rs6000.md (FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 766e2a8ec6c..01ac5ea3f54 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -10007,17 +10007,23 @@ ix86_expand_sse_comi_round (const struct builtin_description *d, const struct insn_data_d *insn_p = &insn_data[icode]; machine_mode mode0 = insn_p->operand[0].mode; machine_mode mode1 = insn_p->operand[1].mode; - enum rtx_code comparison = UNEQ; - bool need_ucomi = false; /* See avxintrin.h for values. */ - enum rtx_code comi_comparisons[32] = + static const enum rtx_code comparisons[32] = { - UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT, - UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE, - UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT + EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED, + UNEQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED, + EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED, + UNEQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED }; - bool need_ucomi_values[32] = + static const bool ordereds[32] = + { + true, true, true, false, false, false, false, true, + false, false, false, true, true, true, true, false, + true, true, true, false, false, false, false, true, + false, false, false, true, true, true, true, false + }; + static const bool non_signalings[32] = { true, false, false, true, true, false, false, true, true, false, false, true, true, false, false, true, @@ -10042,16 +10048,94 @@ ix86_expand_sse_comi_round (const struct builtin_description *d, return const0_rtx; } - comparison = comi_comparisons[INTVAL (op2)]; - need_ucomi = need_ucomi_values[INTVAL (op2)]; - if (VECTOR_MODE_P (mode0)) op0 = safe_vector_operand (op0, mode0); if (VECTOR_MODE_P (mode1)) op1 = safe_vector_operand (op1, mode1); + enum rtx_code comparison = comparisons[INTVAL (op2)]; + bool ordered = ordereds[INTVAL (op2)]; + bool non_signaling = non_signalings[INTVAL (op2)]; + rtx const_val = const0_rtx; + + bool check_unordered = false; + machine_mode mode = CCFPmode; + switch (comparison) + { + case ORDERED: + if (!ordered) + { + /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US. */ + if (!non_signaling) + ordered = true; + mode = CCSmode; + } + else + { + /* NB: Use CCPmode/NE for _CMP_ORD_Q/_CMP_ORD_S. */ + if (non_signaling) + ordered = false; + mode = CCPmode; + } + comparison = NE; + break; + case UNORDERED: + if (ordered) + { + /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS. */ + if (non_signaling) + ordered = false; + mode = CCSmode; + } + else + { + /* NB: Use CCPmode/NE for _CMP_UNORD_Q/_CMP_UNORD_S. */ + if (!non_signaling) + ordered = true; + mode = CCPmode; + } + comparison = EQ; + break; + + case LE: /* -> GE */ + case LT: /* -> GT */ + case UNGE: /* -> UNLE */ + case UNGT: /* -> UNLT */ + std::swap (op0, op1); + comparison = swap_condition (comparison); + /* FALLTHRU */ + case GT: + case GE: + case UNEQ: + case UNLT: + case UNLE: + case LTGT: + /* These are supported by CCFPmode. NB: Use ordered/signaling + COMI or unordered/non-signaling UCOMI. Both set ZF, PF, CF + with NAN operands. */ + if (ordered == non_signaling) + ordered = !ordered; + break; + case EQ: + /* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for + _CMP_EQ_OQ/_CMP_EQ_OS. */ + check_unordered = true; + mode = CCZmode; + break; + case NE: + /* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for + _CMP_NEQ_UQ/_CMP_NEQ_US. */ + gcc_assert (!ordered); + check_unordered = true; + mode = CCZmode; + const_val = const1_rtx; + break; + default: + gcc_unreachable (); + } + target = gen_reg_rtx (SImode); - emit_move_insn (target, const0_rtx); + emit_move_insn (target, const_val); target = gen_rtx_SUBREG (QImode, target, 0); if ((optimize && !register_operand (op0, mode0)) @@ -10061,10 +10145,14 @@ ix86_expand_sse_comi_round (const struct builtin_description *d, || !insn_p->operand[1].predicate (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); - if (need_ucomi) - icode = icode == CODE_FOR_sse_comi_round - ? CODE_FOR_sse_ucomi_round - : CODE_FOR_sse2_ucomi_round; + /* + 1. COMI: ordered and signaling. + 2. UCOMI: unordered and non-signaling. + */ + if (non_signaling) + icode = (icode == CODE_FOR_sse_comi_round + ? CODE_FOR_sse_ucomi_round + : CODE_FOR_sse2_ucomi_round); pat = GEN_FCN (icode) (op0, op1, op3); if (! pat) @@ -10086,11 +10174,42 @@ ix86_expand_sse_comi_round (const struct builtin_description *d, } emit_insn (pat); + + rtx_code_label *label = NULL; + + /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient + with NAN operands. */ + if (check_unordered) + { + gcc_assert (comparison == EQ || comparison == NE); + + rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG); + label = gen_label_rtx (); + rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + } + + /* NB: Set CCFPmode and check a different CCmode which is in subset + of CCFPmode. */ + if (GET_MODE (set_dst) != mode) + { + gcc_assert (mode == CCAmode || mode == CCCmode + || mode == CCOmode || mode == CCPmode + || mode == CCSmode || mode == CCZmode); + set_dst = gen_rtx_REG (mode, FLAGS_REG); + } + emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), gen_rtx_fmt_ee (comparison, QImode, set_dst, const0_rtx))); + if (label) + emit_label (label); + return SUBREG_REG (target); } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6535ca09f32..d3705be7d7c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1563,6 +1563,14 @@ optimization. * gcc.dg/tree-ssa/pr88676-2.c: New testcase. +2019-05-06 H.J. Lu + Hongtao Liu + + PR target/89750 + PR target/86444 + * gcc.target/i386/avx512f-vcomisd-2.c: New. + * gcc.target/i386/avx512f-vcomisd-2.c: Likewise. + 2019-05-06 Steven G. Kargl PR fortran/90290 diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcomisd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcomisd-2.c new file mode 100644 index 00000000000..a84580e29c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcomisd-2.c @@ -0,0 +1,104 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512f" } */ + +#include "avx512f-check.h" + +static inline void __attribute__ ((__always_inline__)) +check_cmp (double s1, double s2, const int imm, int expected) +{ + __m128d source1 = _mm_load_sd (&s1); + __m128d source2 = _mm_load_sd (&s2); + int res = _mm_comi_round_sd (source1, source2, imm, + _MM_FROUND_NO_EXC); + if (expected != res) + abort(); +} + +static void +do_check (double s1, double s2) +{ + check_cmp (s1, s2, _CMP_EQ_OQ, + !__builtin_isunordered (s1, s2) && s1 == s2); + check_cmp (s1, s2, _CMP_LT_OS, + !__builtin_isunordered (s1, s2) && s1 < s2); + check_cmp (s1, s2, _CMP_LE_OS, + !__builtin_isunordered (s1, s2) && s1 <= s2); + check_cmp (s1, s2, _CMP_UNORD_Q, + __builtin_isunordered (s1, s2)); + check_cmp (s1, s2, _CMP_NEQ_UQ, + __builtin_isunordered (s1, s2) || s1 != s2); + check_cmp (s1, s2, _CMP_NLT_US, + __builtin_isunordered (s1, s2) || s1 >= s2); + check_cmp (s1, s2, _CMP_NLE_US, + __builtin_isunordered (s1, s2) || s1 > s2); + check_cmp (s1, s2, _CMP_ORD_Q, + !__builtin_isunordered (s1, s2)); + check_cmp (s1, s2, _CMP_EQ_UQ, + __builtin_isunordered (s1, s2) || s1 == s2); + check_cmp (s1, s2, _CMP_NGE_US, + __builtin_isunordered (s1, s2) || s1 < s2); + check_cmp (s1, s2, _CMP_NGT_US, + __builtin_isunordered (s1, s2) || s1 <= s2); + check_cmp (s1, s2, _CMP_FALSE_OQ, 0); + check_cmp (s1, s2, _CMP_NEQ_OQ, + !__builtin_isunordered (s1, s2) && s1 != s2); + check_cmp (s1, s2, _CMP_GE_OS, + !__builtin_isunordered (s1, s2) && s1 >= s2); + check_cmp (s1, s2, _CMP_GT_OS, + !__builtin_isunordered (s1, s2) && s1 > s2); + check_cmp (s1, s2, _CMP_TRUE_UQ, 1); + check_cmp (s1, s2, _CMP_EQ_OS, + !__builtin_isunordered (s1, s2) && s1 == s2); + check_cmp (s1, s2, _CMP_LT_OQ, + !__builtin_isunordered (s1, s2) && s1 < s2); + check_cmp (s1, s2, _CMP_LE_OQ, + !__builtin_isunordered (s1, s2) && s1 <= s2); + check_cmp (s1, s2, _CMP_UNORD_S, + __builtin_isunordered (s1, s2)); + check_cmp (s1, s2, _CMP_NEQ_US, + __builtin_isunordered (s1, s2) || s1 != s2); + check_cmp (s1, s2, _CMP_NLT_UQ, + __builtin_isunordered (s1, s2) || s1 >= s2); + check_cmp (s1, s2, _CMP_NLE_UQ, + __builtin_isunordered (s1, s2) || s1 > s2); + check_cmp (s1, s2, _CMP_ORD_S, !__builtin_isunordered (s1, s2)); + check_cmp (s1, s2, _CMP_EQ_US, + __builtin_isunordered (s1, s2) || s1 == s2); + check_cmp (s1, s2, _CMP_NGE_UQ, + __builtin_isunordered (s1, s2) || s1 < s2); + check_cmp (s1, s2, _CMP_NGT_UQ, + __builtin_isunordered (s1, s2) || s1 <= s2); + check_cmp (s1, s2, _CMP_FALSE_OS, 0); + check_cmp (s1, s2, _CMP_NEQ_OS, + !__builtin_isunordered (s1, s2) && s1 != s2); + check_cmp (s1, s2, _CMP_GE_OQ, + !__builtin_isunordered (s1, s2) && s1 >= s2); + check_cmp (s1, s2, _CMP_GT_OQ, + !__builtin_isunordered (s1, s2) && s1 > s2); + check_cmp (s1, s2, _CMP_TRUE_US, 1); +} + +static void +avx512f_test (void) +{ + struct + { + double x1; + double x2; + } + inputs[] = + { + { 4.3, 2.18 }, + { -4.3, 3.18 }, + { __builtin_nan (""), -5.8 }, + { -4.8, __builtin_nans ("") }, + { 3.8, __builtin_nans ("") }, + { 4.2, 4.2 }, + { __builtin_nan (""), __builtin_nans ("") }, + }; + int i; + + for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++) + do_check (inputs[i].x1, inputs[i].x2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcomiss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcomiss-2.c new file mode 100644 index 00000000000..381a8b4661e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcomiss-2.c @@ -0,0 +1,104 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512f" } */ + +#include "avx512f-check.h" + +static inline void __attribute__ ((__always_inline__)) +check_cmp (float s1, float s2, const int imm, int expected) +{ + __m128 source1 = _mm_load_ss (&s1); + __m128 source2 = _mm_load_ss (&s2); + int res = _mm_comi_round_ss (source1, source2, imm, + _MM_FROUND_NO_EXC); + if (expected != res) + abort(); +} + +static void +do_check (float s1, float s2) +{ + check_cmp (s1, s2, _CMP_EQ_OQ, + !__builtin_isunordered (s1, s2) && s1 == s2); + check_cmp (s1, s2, _CMP_LT_OS, + !__builtin_isunordered (s1, s2) && s1 < s2); + check_cmp (s1, s2, _CMP_LE_OS, + !__builtin_isunordered (s1, s2) && s1 <= s2); + check_cmp (s1, s2, _CMP_UNORD_Q, + __builtin_isunordered (s1, s2)); + check_cmp (s1, s2, _CMP_NEQ_UQ, + __builtin_isunordered (s1, s2) || s1 != s2); + check_cmp (s1, s2, _CMP_NLT_US, + __builtin_isunordered (s1, s2) || s1 >= s2); + check_cmp (s1, s2, _CMP_NLE_US, + __builtin_isunordered (s1, s2) || s1 > s2); + check_cmp (s1, s2, _CMP_ORD_Q, + !__builtin_isunordered (s1, s2)); + check_cmp (s1, s2, _CMP_EQ_UQ, + __builtin_isunordered (s1, s2) || s1 == s2); + check_cmp (s1, s2, _CMP_NGE_US, + __builtin_isunordered (s1, s2) || s1 < s2); + check_cmp (s1, s2, _CMP_NGT_US, + __builtin_isunordered (s1, s2) || s1 <= s2); + check_cmp (s1, s2, _CMP_FALSE_OQ, 0); + check_cmp (s1, s2, _CMP_NEQ_OQ, + !__builtin_isunordered (s1, s2) && s1 != s2); + check_cmp (s1, s2, _CMP_GE_OS, + !__builtin_isunordered (s1, s2) && s1 >= s2); + check_cmp (s1, s2, _CMP_GT_OS, + !__builtin_isunordered (s1, s2) && s1 > s2); + check_cmp (s1, s2, _CMP_TRUE_UQ, 1); + check_cmp (s1, s2, _CMP_EQ_OS, + !__builtin_isunordered (s1, s2) && s1 == s2); + check_cmp (s1, s2, _CMP_LT_OQ, + !__builtin_isunordered (s1, s2) && s1 < s2); + check_cmp (s1, s2, _CMP_LE_OQ, + !__builtin_isunordered (s1, s2) && s1 <= s2); + check_cmp (s1, s2, _CMP_UNORD_S, + __builtin_isunordered (s1, s2)); + check_cmp (s1, s2, _CMP_NEQ_US, + __builtin_isunordered (s1, s2) || s1 != s2); + check_cmp (s1, s2, _CMP_NLT_UQ, + __builtin_isunordered (s1, s2) || s1 >= s2); + check_cmp (s1, s2, _CMP_NLE_UQ, + __builtin_isunordered (s1, s2) || s1 > s2); + check_cmp (s1, s2, _CMP_ORD_S, !__builtin_isunordered (s1, s2)); + check_cmp (s1, s2, _CMP_EQ_US, + __builtin_isunordered (s1, s2) || s1 == s2); + check_cmp (s1, s2, _CMP_NGE_UQ, + __builtin_isunordered (s1, s2) || s1 < s2); + check_cmp (s1, s2, _CMP_NGT_UQ, + __builtin_isunordered (s1, s2) || s1 <= s2); + check_cmp (s1, s2, _CMP_FALSE_OS, 0); + check_cmp (s1, s2, _CMP_NEQ_OS, + !__builtin_isunordered (s1, s2) && s1 != s2); + check_cmp (s1, s2, _CMP_GE_OQ, + !__builtin_isunordered (s1, s2) && s1 >= s2); + check_cmp (s1, s2, _CMP_GT_OQ, + !__builtin_isunordered (s1, s2) && s1 > s2); + check_cmp (s1, s2, _CMP_TRUE_US, 1); +} + +static void +avx512f_test (void) +{ + struct + { + float x1; + float x2; + } + inputs[] = + { + { 4.3, 2.18 }, + { -4.3, 3.18 }, + { __builtin_nanf (""), -5.8 }, + { -4.8, __builtin_nansf ("") }, + { 3.8, __builtin_nansf ("") }, + { 4.2, 4.2 }, + { __builtin_nanf (""), __builtin_nansf ("") }, + }; + int i; + + for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++) + do_check (inputs[i].x1, inputs[i].x2); +}