* tree-ssa-phiopt.c (two_value_replacement): Fix a typo in parameter
detection.
+2019-05-06 H.J. Lu <hongjiu.lu@intel.com>
+ Hongtao Liu <hongtao.liu@intel.com>
+
+ PR target/89750
+ PR target/86444
+ * config/i386/i386-expand.c (ix86_expand_sse_comi_round):
+ Modified, original implementation isn't correct.
+
2019-05-06 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000.md (FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO)
const struct insn_data_d *insn_p = &insn_data[icode];
machine_mode mode0 = insn_p->operand[0].mode;
machine_mode mode1 = insn_p->operand[1].mode;
- enum rtx_code comparison = UNEQ;
- bool need_ucomi = false;
/* See avxintrin.h for values. */
- enum rtx_code comi_comparisons[32] =
+ static const enum rtx_code comparisons[32] =
{
- UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
- UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
- UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
+ EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED,
+ UNEQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED,
+ EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED,
+ UNEQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED
};
- bool need_ucomi_values[32] =
+ static const bool ordereds[32] =
+ {
+ true, true, true, false, false, false, false, true,
+ false, false, false, true, true, true, true, false,
+ true, true, true, false, false, false, false, true,
+ false, false, false, true, true, true, true, false
+ };
+ static const bool non_signalings[32] =
{
true, false, false, true, true, false, false, true,
true, false, false, true, true, false, false, true,
return const0_rtx;
}
- comparison = comi_comparisons[INTVAL (op2)];
- need_ucomi = need_ucomi_values[INTVAL (op2)];
-
if (VECTOR_MODE_P (mode0))
op0 = safe_vector_operand (op0, mode0);
if (VECTOR_MODE_P (mode1))
op1 = safe_vector_operand (op1, mode1);
+ enum rtx_code comparison = comparisons[INTVAL (op2)];
+ bool ordered = ordereds[INTVAL (op2)];
+ bool non_signaling = non_signalings[INTVAL (op2)];
+ rtx const_val = const0_rtx;
+
+ bool check_unordered = false;
+ machine_mode mode = CCFPmode;
+ switch (comparison)
+ {
+ case ORDERED:
+ if (!ordered)
+ {
+ /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US. */
+ if (!non_signaling)
+ ordered = true;
+ mode = CCSmode;
+ }
+ else
+ {
+ /* NB: Use CCPmode/NE for _CMP_ORD_Q/_CMP_ORD_S. */
+ if (non_signaling)
+ ordered = false;
+ mode = CCPmode;
+ }
+ comparison = NE;
+ break;
+ case UNORDERED:
+ if (ordered)
+ {
+ /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS. */
+ if (non_signaling)
+ ordered = false;
+ mode = CCSmode;
+ }
+ else
+ {
+ /* NB: Use CCPmode/NE for _CMP_UNORD_Q/_CMP_UNORD_S. */
+ if (!non_signaling)
+ ordered = true;
+ mode = CCPmode;
+ }
+ comparison = EQ;
+ break;
+
+ case LE: /* -> GE */
+ case LT: /* -> GT */
+ case UNGE: /* -> UNLE */
+ case UNGT: /* -> UNLT */
+ std::swap (op0, op1);
+ comparison = swap_condition (comparison);
+ /* FALLTHRU */
+ case GT:
+ case GE:
+ case UNEQ:
+ case UNLT:
+ case UNLE:
+ case LTGT:
+ /* These are supported by CCFPmode. NB: Use ordered/signaling
+ COMI or unordered/non-signaling UCOMI. Both set ZF, PF, CF
+ with NAN operands. */
+ if (ordered == non_signaling)
+ ordered = !ordered;
+ break;
+ case EQ:
+ /* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for
+ _CMP_EQ_OQ/_CMP_EQ_OS. */
+ check_unordered = true;
+ mode = CCZmode;
+ break;
+ case NE:
+ /* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for
+ _CMP_NEQ_UQ/_CMP_NEQ_US. */
+ gcc_assert (!ordered);
+ check_unordered = true;
+ mode = CCZmode;
+ const_val = const1_rtx;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
+ emit_move_insn (target, const_val);
target = gen_rtx_SUBREG (QImode, target, 0);
if ((optimize && !register_operand (op0, mode0))
|| !insn_p->operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
- if (need_ucomi)
- icode = icode == CODE_FOR_sse_comi_round
- ? CODE_FOR_sse_ucomi_round
- : CODE_FOR_sse2_ucomi_round;
+ /*
+ 1. COMI: ordered and signaling.
+ 2. UCOMI: unordered and non-signaling.
+ */
+ if (non_signaling)
+ icode = (icode == CODE_FOR_sse_comi_round
+ ? CODE_FOR_sse_ucomi_round
+ : CODE_FOR_sse2_ucomi_round);
pat = GEN_FCN (icode) (op0, op1, op3);
if (! pat)
}
emit_insn (pat);
+
+ rtx_code_label *label = NULL;
+
+ /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
+ with NAN operands. */
+ if (check_unordered)
+ {
+ gcc_assert (comparison == EQ || comparison == NE);
+
+ rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG);
+ label = gen_label_rtx ();
+ rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+ }
+
+ /* NB: Set CCFPmode and check a different CCmode which is in subset
+ of CCFPmode. */
+ if (GET_MODE (set_dst) != mode)
+ {
+ gcc_assert (mode == CCAmode || mode == CCCmode
+ || mode == CCOmode || mode == CCPmode
+ || mode == CCSmode || mode == CCZmode);
+ set_dst = gen_rtx_REG (mode, FLAGS_REG);
+ }
+
emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
gen_rtx_fmt_ee (comparison, QImode,
set_dst,
const0_rtx)));
+ if (label)
+ emit_label (label);
+
return SUBREG_REG (target);
}
optimization.
* gcc.dg/tree-ssa/pr88676-2.c: New testcase.
+2019-05-06 H.J. Lu <hongjiu.lu@intel.com>
+ Hongtao Liu <hongtao.liu@intel.com>
+
+ PR target/89750
+ PR target/86444
+ * gcc.target/i386/avx512f-vcomisd-2.c: New.
+ * gcc.target/i386/avx512f-vcomisd-2.c: Likewise.
+
2019-05-06 Steven G. Kargl <kargl@gcc.gnu.org>
PR fortran/90290
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include "avx512f-check.h"
+
+static inline void __attribute__ ((__always_inline__))
+check_cmp (double s1, double s2, const int imm, int expected)
+{
+ __m128d source1 = _mm_load_sd (&s1);
+ __m128d source2 = _mm_load_sd (&s2);
+ int res = _mm_comi_round_sd (source1, source2, imm,
+ _MM_FROUND_NO_EXC);
+ if (expected != res)
+ abort();
+}
+
+static void
+do_check (double s1, double s2)
+{
+ check_cmp (s1, s2, _CMP_EQ_OQ,
+ !__builtin_isunordered (s1, s2) && s1 == s2);
+ check_cmp (s1, s2, _CMP_LT_OS,
+ !__builtin_isunordered (s1, s2) && s1 < s2);
+ check_cmp (s1, s2, _CMP_LE_OS,
+ !__builtin_isunordered (s1, s2) && s1 <= s2);
+ check_cmp (s1, s2, _CMP_UNORD_Q,
+ __builtin_isunordered (s1, s2));
+ check_cmp (s1, s2, _CMP_NEQ_UQ,
+ __builtin_isunordered (s1, s2) || s1 != s2);
+ check_cmp (s1, s2, _CMP_NLT_US,
+ __builtin_isunordered (s1, s2) || s1 >= s2);
+ check_cmp (s1, s2, _CMP_NLE_US,
+ __builtin_isunordered (s1, s2) || s1 > s2);
+ check_cmp (s1, s2, _CMP_ORD_Q,
+ !__builtin_isunordered (s1, s2));
+ check_cmp (s1, s2, _CMP_EQ_UQ,
+ __builtin_isunordered (s1, s2) || s1 == s2);
+ check_cmp (s1, s2, _CMP_NGE_US,
+ __builtin_isunordered (s1, s2) || s1 < s2);
+ check_cmp (s1, s2, _CMP_NGT_US,
+ __builtin_isunordered (s1, s2) || s1 <= s2);
+ check_cmp (s1, s2, _CMP_FALSE_OQ, 0);
+ check_cmp (s1, s2, _CMP_NEQ_OQ,
+ !__builtin_isunordered (s1, s2) && s1 != s2);
+ check_cmp (s1, s2, _CMP_GE_OS,
+ !__builtin_isunordered (s1, s2) && s1 >= s2);
+ check_cmp (s1, s2, _CMP_GT_OS,
+ !__builtin_isunordered (s1, s2) && s1 > s2);
+ check_cmp (s1, s2, _CMP_TRUE_UQ, 1);
+ check_cmp (s1, s2, _CMP_EQ_OS,
+ !__builtin_isunordered (s1, s2) && s1 == s2);
+ check_cmp (s1, s2, _CMP_LT_OQ,
+ !__builtin_isunordered (s1, s2) && s1 < s2);
+ check_cmp (s1, s2, _CMP_LE_OQ,
+ !__builtin_isunordered (s1, s2) && s1 <= s2);
+ check_cmp (s1, s2, _CMP_UNORD_S,
+ __builtin_isunordered (s1, s2));
+ check_cmp (s1, s2, _CMP_NEQ_US,
+ __builtin_isunordered (s1, s2) || s1 != s2);
+ check_cmp (s1, s2, _CMP_NLT_UQ,
+ __builtin_isunordered (s1, s2) || s1 >= s2);
+ check_cmp (s1, s2, _CMP_NLE_UQ,
+ __builtin_isunordered (s1, s2) || s1 > s2);
+ check_cmp (s1, s2, _CMP_ORD_S, !__builtin_isunordered (s1, s2));
+ check_cmp (s1, s2, _CMP_EQ_US,
+ __builtin_isunordered (s1, s2) || s1 == s2);
+ check_cmp (s1, s2, _CMP_NGE_UQ,
+ __builtin_isunordered (s1, s2) || s1 < s2);
+ check_cmp (s1, s2, _CMP_NGT_UQ,
+ __builtin_isunordered (s1, s2) || s1 <= s2);
+ check_cmp (s1, s2, _CMP_FALSE_OS, 0);
+ check_cmp (s1, s2, _CMP_NEQ_OS,
+ !__builtin_isunordered (s1, s2) && s1 != s2);
+ check_cmp (s1, s2, _CMP_GE_OQ,
+ !__builtin_isunordered (s1, s2) && s1 >= s2);
+ check_cmp (s1, s2, _CMP_GT_OQ,
+ !__builtin_isunordered (s1, s2) && s1 > s2);
+ check_cmp (s1, s2, _CMP_TRUE_US, 1);
+}
+
+static void
+avx512f_test (void)
+{
+ struct
+ {
+ double x1;
+ double x2;
+ }
+ inputs[] =
+ {
+ { 4.3, 2.18 },
+ { -4.3, 3.18 },
+ { __builtin_nan (""), -5.8 },
+ { -4.8, __builtin_nans ("") },
+ { 3.8, __builtin_nans ("") },
+ { 4.2, 4.2 },
+ { __builtin_nan (""), __builtin_nans ("") },
+ };
+ int i;
+
+ for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
+ do_check (inputs[i].x1, inputs[i].x2);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include "avx512f-check.h"
+
+static inline void __attribute__ ((__always_inline__))
+check_cmp (float s1, float s2, const int imm, int expected)
+{
+ __m128 source1 = _mm_load_ss (&s1);
+ __m128 source2 = _mm_load_ss (&s2);
+ int res = _mm_comi_round_ss (source1, source2, imm,
+ _MM_FROUND_NO_EXC);
+ if (expected != res)
+ abort();
+}
+
+static void
+do_check (float s1, float s2)
+{
+ check_cmp (s1, s2, _CMP_EQ_OQ,
+ !__builtin_isunordered (s1, s2) && s1 == s2);
+ check_cmp (s1, s2, _CMP_LT_OS,
+ !__builtin_isunordered (s1, s2) && s1 < s2);
+ check_cmp (s1, s2, _CMP_LE_OS,
+ !__builtin_isunordered (s1, s2) && s1 <= s2);
+ check_cmp (s1, s2, _CMP_UNORD_Q,
+ __builtin_isunordered (s1, s2));
+ check_cmp (s1, s2, _CMP_NEQ_UQ,
+ __builtin_isunordered (s1, s2) || s1 != s2);
+ check_cmp (s1, s2, _CMP_NLT_US,
+ __builtin_isunordered (s1, s2) || s1 >= s2);
+ check_cmp (s1, s2, _CMP_NLE_US,
+ __builtin_isunordered (s1, s2) || s1 > s2);
+ check_cmp (s1, s2, _CMP_ORD_Q,
+ !__builtin_isunordered (s1, s2));
+ check_cmp (s1, s2, _CMP_EQ_UQ,
+ __builtin_isunordered (s1, s2) || s1 == s2);
+ check_cmp (s1, s2, _CMP_NGE_US,
+ __builtin_isunordered (s1, s2) || s1 < s2);
+ check_cmp (s1, s2, _CMP_NGT_US,
+ __builtin_isunordered (s1, s2) || s1 <= s2);
+ check_cmp (s1, s2, _CMP_FALSE_OQ, 0);
+ check_cmp (s1, s2, _CMP_NEQ_OQ,
+ !__builtin_isunordered (s1, s2) && s1 != s2);
+ check_cmp (s1, s2, _CMP_GE_OS,
+ !__builtin_isunordered (s1, s2) && s1 >= s2);
+ check_cmp (s1, s2, _CMP_GT_OS,
+ !__builtin_isunordered (s1, s2) && s1 > s2);
+ check_cmp (s1, s2, _CMP_TRUE_UQ, 1);
+ check_cmp (s1, s2, _CMP_EQ_OS,
+ !__builtin_isunordered (s1, s2) && s1 == s2);
+ check_cmp (s1, s2, _CMP_LT_OQ,
+ !__builtin_isunordered (s1, s2) && s1 < s2);
+ check_cmp (s1, s2, _CMP_LE_OQ,
+ !__builtin_isunordered (s1, s2) && s1 <= s2);
+ check_cmp (s1, s2, _CMP_UNORD_S,
+ __builtin_isunordered (s1, s2));
+ check_cmp (s1, s2, _CMP_NEQ_US,
+ __builtin_isunordered (s1, s2) || s1 != s2);
+ check_cmp (s1, s2, _CMP_NLT_UQ,
+ __builtin_isunordered (s1, s2) || s1 >= s2);
+ check_cmp (s1, s2, _CMP_NLE_UQ,
+ __builtin_isunordered (s1, s2) || s1 > s2);
+ check_cmp (s1, s2, _CMP_ORD_S, !__builtin_isunordered (s1, s2));
+ check_cmp (s1, s2, _CMP_EQ_US,
+ __builtin_isunordered (s1, s2) || s1 == s2);
+ check_cmp (s1, s2, _CMP_NGE_UQ,
+ __builtin_isunordered (s1, s2) || s1 < s2);
+ check_cmp (s1, s2, _CMP_NGT_UQ,
+ __builtin_isunordered (s1, s2) || s1 <= s2);
+ check_cmp (s1, s2, _CMP_FALSE_OS, 0);
+ check_cmp (s1, s2, _CMP_NEQ_OS,
+ !__builtin_isunordered (s1, s2) && s1 != s2);
+ check_cmp (s1, s2, _CMP_GE_OQ,
+ !__builtin_isunordered (s1, s2) && s1 >= s2);
+ check_cmp (s1, s2, _CMP_GT_OQ,
+ !__builtin_isunordered (s1, s2) && s1 > s2);
+ check_cmp (s1, s2, _CMP_TRUE_US, 1);
+}
+
+static void
+avx512f_test (void)
+{
+ struct
+ {
+ float x1;
+ float x2;
+ }
+ inputs[] =
+ {
+ { 4.3, 2.18 },
+ { -4.3, 3.18 },
+ { __builtin_nanf (""), -5.8 },
+ { -4.8, __builtin_nansf ("") },
+ { 3.8, __builtin_nansf ("") },
+ { 4.2, 4.2 },
+ { __builtin_nanf (""), __builtin_nansf ("") },
+ };
+ int i;
+
+ for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
+ do_check (inputs[i].x1, inputs[i].x2);
+}