extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
extern bool arm_valid_symbolic_address_p (rtx);
extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
+extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
#endif /* RTX_CODE */
extern bool arm_gen_setmem (rtx *);
+extern void arm_expand_vcond (rtx *, machine_mode);
extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes);
arm_post_atomic_barrier (model);
}
\f
+/* Expand code to compare vectors OP0 and OP1 using condition CODE.
+ If CAN_INVERT, store either the result or its inverse in TARGET
+ and return true if TARGET contains the inverse. If !CAN_INVERT,
+ always store the result in TARGET, never its inverse.
+
+ Note that the handling of floating-point comparisons is not
+ IEEE compliant. */
+
+bool
+arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
+ bool can_invert)
+{
+ machine_mode cmp_result_mode = GET_MODE (target);
+ machine_mode cmp_mode = GET_MODE (op0);
+
+ bool inverted;
+ switch (code)
+ {
+ /* For these we need to compute the inverse of the requested
+ comparison. */
+ case UNORDERED:
+ case UNLT:
+ case UNLE:
+ case UNGT:
+ case UNGE:
+ case UNEQ:
+ case NE:
+ code = reverse_condition_maybe_unordered (code);
+ if (!can_invert)
+ {
+ /* Recursively emit the inverted comparison into a temporary
+ and then store its inverse in TARGET. This avoids reusing
+ TARGET (which for integer NE could be one of the inputs). */
+ rtx tmp = gen_reg_rtx (cmp_result_mode);
+ if (arm_expand_vector_compare (tmp, code, op0, op1, true))
+ gcc_unreachable ();
+ emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
+ return false;
+ }
+ inverted = true;
+ break;
+
+ default:
+ inverted = false;
+ break;
+ }
+
+ switch (code)
+ {
+ /* These are natively supported for zero comparisons, but otherwise
+ require the operands to be swapped. */
+ case LE:
+ case LT:
+ if (op1 != CONST0_RTX (cmp_mode))
+ {
+ code = swap_condition (code);
+ std::swap (op0, op1);
+ }
+ /* Fall through. */
+
+ /* These are natively supported for both register and zero operands. */
+ case EQ:
+ case GE:
+ case GT:
+ emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
+ return inverted;
+
+ /* These are natively supported for register operands only.
+ Comparisons with zero aren't useful and should be folded
+ or canonicalized by target-independent code. */
+ case GEU:
+ case GTU:
+ emit_insn (gen_neon_vc (code, cmp_mode, target,
+ op0, force_reg (cmp_mode, op1)));
+ return inverted;
+
+ /* These require the operands to be swapped and likewise do not
+ support comparisons with zero. */
+ case LEU:
+ case LTU:
+ emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
+ target, force_reg (cmp_mode, op1), op0));
+ return inverted;
+
+ /* These need a combination of two comparisons. */
+ case LTGT:
+ case ORDERED:
+ {
+ /* Operands are LTGT iff (a > b || a > b).
+ Operands are ORDERED iff (a > b || a <= b). */
+ rtx gt_res = gen_reg_rtx (cmp_result_mode);
+ rtx alt_res = gen_reg_rtx (cmp_result_mode);
+ rtx_code alt_code = (code == LTGT ? LT : LE);
+ if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
+ || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
+ gcc_unreachable ();
+ emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
+ gt_res, alt_res)));
+ return inverted;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Expand a vcond or vcondu pattern with operands OPERANDS.
+ CMP_RESULT_MODE is the mode of the comparison result. */
+
+void
+arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
+{
+ rtx mask = gen_reg_rtx (cmp_result_mode);
+ bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
+ operands[4], operands[5], true);
+ if (inverted)
+ std::swap (operands[1], operands[2]);
+ emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
+ mask, operands[1], operands[2]));
+}
+\f
#define MAX_VECT_LEN 16
struct expand_vec_perm_d
[(set_attr "type" "neon_qsub<q>")]
)
+(define_expand "vec_cmp<mode><v_cmp_result>"
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
+ (match_operator:<V_cmp_result> 1 "comparison_operator"
+ [(match_operand:VDQW 2 "s_register_operand")
+ (match_operand:VDQW 3 "reg_or_zero_operand")]))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
+})
+
+(define_expand "vec_cmpu<mode><mode>"
+ [(set (match_operand:VDQIW 0 "s_register_operand")
+ (match_operator:VDQIW 1 "comparison_operator"
+ [(match_operand:VDQIW 2 "s_register_operand")
+ (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
+ "TARGET_NEON"
+{
+ arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
+})
+
;; Conditional instructions. These are comparisons with conditional moves for
;; vectors. They perform the assignment:
;;
(if_then_else:VDQW
(match_operator 3 "comparison_operator"
[(match_operand:VDQW 4 "s_register_operand")
- (match_operand:VDQW 5 "nonmemory_operand")])
+ (match_operand:VDQW 5 "reg_or_zero_operand")])
(match_operand:VDQW 1 "s_register_operand")
(match_operand:VDQW 2 "s_register_operand")))]
"TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
- int inverse = 0;
- int use_zero_form = 0;
- int swap_bsl_operands = 0;
- rtx mask = gen_reg_rtx (<V_cmp_result>mode);
- rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
-
- rtx (*base_comparison) (rtx, rtx, rtx);
- rtx (*complimentary_comparison) (rtx, rtx, rtx);
-
- switch (GET_CODE (operands[3]))
- {
- case GE:
- case GT:
- case LE:
- case LT:
- case EQ:
- if (operands[5] == CONST0_RTX (<MODE>mode))
- {
- use_zero_form = 1;
- break;
- }
- /* Fall through. */
- default:
- if (!REG_P (operands[5]))
- operands[5] = force_reg (<MODE>mode, operands[5]);
- }
-
- switch (GET_CODE (operands[3]))
- {
- case LT:
- case UNLT:
- inverse = 1;
- /* Fall through. */
- case GE:
- case UNGE:
- case ORDERED:
- case UNORDERED:
- base_comparison = gen_neon_vcge<mode>;
- complimentary_comparison = gen_neon_vcgt<mode>;
- break;
- case LE:
- case UNLE:
- inverse = 1;
- /* Fall through. */
- case GT:
- case UNGT:
- base_comparison = gen_neon_vcgt<mode>;
- complimentary_comparison = gen_neon_vcge<mode>;
- break;
- case EQ:
- case NE:
- case UNEQ:
- base_comparison = gen_neon_vceq<mode>;
- complimentary_comparison = gen_neon_vceq<mode>;
- break;
- default:
- gcc_unreachable ();
- }
-
- switch (GET_CODE (operands[3]))
- {
- case LT:
- case LE:
- case GT:
- case GE:
- case EQ:
- /* The easy case. Here we emit one of vcge, vcgt or vceq.
- As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
- a GE b -> a GE b
- a GT b -> a GT b
- a LE b -> b GE a
- a LT b -> b GT a
- a EQ b -> a EQ b
- Note that there also exist direct comparison against 0 forms,
- so catch those as a special case. */
- if (use_zero_form)
- {
- inverse = 0;
- switch (GET_CODE (operands[3]))
- {
- case LT:
- base_comparison = gen_neon_vclt<mode>;
- break;
- case LE:
- base_comparison = gen_neon_vcle<mode>;
- break;
- default:
- /* Do nothing, other zero form cases already have the correct
- base_comparison. */
- break;
- }
- }
-
- if (!inverse)
- emit_insn (base_comparison (mask, operands[4], operands[5]));
- else
- emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
- break;
- case UNLT:
- case UNLE:
- case UNGT:
- case UNGE:
- case NE:
- /* Vector compare returns false for lanes which are unordered, so if we use
- the inverse of the comparison we actually want to emit, then
- swap the operands to BSL, we will end up with the correct result.
- Note that a NE NaN and NaN NE b are true for all a, b.
-
- Our transformations are:
- a GE b -> !(b GT a)
- a GT b -> !(b GE a)
- a LE b -> !(a GT b)
- a LT b -> !(a GE b)
- a NE b -> !(a EQ b) */
-
- if (inverse)
- emit_insn (base_comparison (mask, operands[4], operands[5]));
- else
- emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
-
- swap_bsl_operands = 1;
- break;
- case UNEQ:
- /* We check (a > b || b > a). combining these comparisons give us
- true iff !(a != b && a ORDERED b), swapping the operands to BSL
- will then give us (a == b || a UNORDERED b) as intended. */
-
- emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
- emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
- emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
- swap_bsl_operands = 1;
- break;
- case UNORDERED:
- /* Operands are ORDERED iff (a > b || b >= a).
- Swapping the operands to BSL will give the UNORDERED case. */
- swap_bsl_operands = 1;
- /* Fall through. */
- case ORDERED:
- emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
- emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
- emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
- break;
- default:
- gcc_unreachable ();
- }
+ arm_expand_vcond (operands, <V_cmp_result>mode);
+ DONE;
+})
- if (swap_bsl_operands)
- emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
- operands[1]));
- else
- emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
- operands[2]));
+(define_expand "vcond<V_cvtto><mode>"
+ [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
+ (if_then_else:<V_CVTTO>
+ (match_operator 3 "comparison_operator"
+ [(match_operand:V32 4 "s_register_operand")
+ (match_operand:V32 5 "reg_or_zero_operand")])
+ (match_operand:<V_CVTTO> 1 "s_register_operand")
+ (match_operand:<V_CVTTO> 2 "s_register_operand")))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ arm_expand_vcond (operands, <V_cmp_result>mode);
DONE;
})
-(define_expand "vcondu<mode><mode>"
- [(set (match_operand:VDQIW 0 "s_register_operand")
- (if_then_else:VDQIW
+(define_expand "vcondu<mode><v_cmp_result>"
+ [(set (match_operand:VDQW 0 "s_register_operand")
+ (if_then_else:VDQW
(match_operator 3 "arm_comparison_operator"
- [(match_operand:VDQIW 4 "s_register_operand")
- (match_operand:VDQIW 5 "s_register_operand")])
- (match_operand:VDQIW 1 "s_register_operand")
- (match_operand:VDQIW 2 "s_register_operand")))]
+ [(match_operand:<V_cmp_result> 4 "s_register_operand")
+ (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
+ (match_operand:VDQW 1 "s_register_operand")
+ (match_operand:VDQW 2 "s_register_operand")))]
"TARGET_NEON"
{
- rtx mask;
- int inverse = 0, immediate_zero = 0;
-
- mask = gen_reg_rtx (<V_cmp_result>mode);
-
- if (operands[5] == CONST0_RTX (<MODE>mode))
- immediate_zero = 1;
- else if (!REG_P (operands[5]))
- operands[5] = force_reg (<MODE>mode, operands[5]);
-
- switch (GET_CODE (operands[3]))
- {
- case GEU:
- emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
- break;
-
- case GTU:
- emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
- break;
-
- case EQ:
- emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
- break;
-
- case LEU:
- if (immediate_zero)
- emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
- else
- emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
- break;
-
- case LTU:
- if (immediate_zero)
- emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
- else
- emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
- break;
-
- case NE:
- emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
- inverse = 1;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- if (inverse)
- emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
- operands[1]));
- else
- emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
- operands[2]));
+ arm_expand_vcond (operands, <V_cmp_result>mode);
+ DONE;
+})
+(define_expand "vcond_mask_<mode><v_cmp_result>"
+ [(set (match_operand:VDQW 0 "s_register_operand")
+ (if_then_else:VDQW
+ (match_operand:<V_cmp_result> 3 "s_register_operand")
+ (match_operand:VDQW 1 "s_register_operand")
+ (match_operand:VDQW 2 "s_register_operand")))]
+ "TARGET_NEON"
+{
+ emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
+ operands[2]));
DONE;
})
;; These may expand to an UNSPEC pattern when a floating point mode is used
;; without unsafe math optimizations.
-(define_expand "neon_vc<cmp_op><mode>"
+(define_expand "@neon_vc<cmp_op><mode>"
[(match_operand:<V_cmp_result> 0 "s_register_operand")
(neg:<V_cmp_result>
(COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
}
)
-(define_insn "neon_vc<cmp_op><mode>_insn"
+(define_insn "@neon_vc<cmp_op><mode>_insn"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
(neg:<V_cmp_result>
(COMPARISONS:<V_cmp_result>
[(set_attr "type" "neon_fp_compare_s<q>")]
)
-(define_expand "neon_vc<cmp_op><mode>"
+(define_expand "@neon_vc<cmp_op><mode>"
[(match_operand:<V_cmp_result> 0 "s_register_operand")
(neg:<V_cmp_result>
(COMPARISONS:VH
}
[(set_attr "type" "neon_fp_compare_s<q>")])
-(define_insn "neon_vc<cmp_op>u<mode>"
+(define_insn "@neon_vc<code><mode>"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
(neg:<V_cmp_result>
(GTUGEU:<V_cmp_result>
[(set_attr "type" "neon_bsl<q>")]
)
-(define_expand "neon_vbsl<mode>"
+(define_expand "@neon_vbsl<mode>"
[(set (match_operand:VDQX 0 "s_register_operand")
(unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
(match_operand:VDQX 2 "s_register_operand")
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1" } */
+/* { dg-add-options arm_neon } */
+
+#define COMPARE_REG(NAME, OP, TYPE) \
+ TYPE \
+ cmp_##NAME##_##TYPE##_reg (TYPE a, TYPE b) \
+ { \
+ return a OP b; \
+ }
+
+#define COMPARE_REG_AND_ZERO(NAME, OP, TYPE) \
+ COMPARE_REG (NAME, OP, TYPE) \
+ \
+ TYPE \
+ cmp_##NAME##_##TYPE##_zero (TYPE a) \
+ { \
+ return a OP (TYPE) {}; \
+ }
+
+#define COMPARE_TYPE(TYPE, COMPARE_ORDERED) \
+ COMPARE_REG_AND_ZERO (eq, ==, TYPE) \
+ COMPARE_REG_AND_ZERO (ne, !=, TYPE) \
+ COMPARE_ORDERED (lt, <, TYPE) \
+ COMPARE_ORDERED (le, <=, TYPE) \
+ COMPARE_ORDERED (gt, >, TYPE) \
+ COMPARE_ORDERED (ge, >=, TYPE)
+
+#define TEST_TYPE(NAME, ELEM, COMPARE_ORDERED) \
+ typedef ELEM NAME __attribute__((vector_size(16))); \
+ COMPARE_TYPE (NAME, COMPARE_ORDERED)
+
+TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO)
+TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG)
+TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO)
+TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG)
+TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO)
+TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG)
+
+/* { s8, u8 } x { eq, ne }.
+/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvclt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcle.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+
+/* { s16, u16 } x { eq, ne }.
+/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvclt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcle.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+
+/* { s32, u32 } x { eq, ne }.
+/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvclt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcle.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1 -funsafe-math-optimizations" } */
+/* { dg-add-options arm_neon } */
+
+#ifndef ELEM_TYPE
+#define ELEM_TYPE float
+#endif
+#ifndef INT_ELEM_TYPE
+#define INT_ELEM_TYPE __INT32_TYPE__
+#endif
+
+#define COMPARE(NAME, OP) \
+ int_vec \
+ cmp_##NAME##_reg (vec a, vec b) \
+ { \
+ return a OP b; \
+ } \
+ \
+ int_vec \
+ cmp_##NAME##_zero (vec a) \
+ { \
+ return a OP (vec) {}; \
+ }
+
+typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16)));
+typedef ELEM_TYPE vec __attribute__((vector_size(16)));
+
+COMPARE (eq, ==)
+COMPARE (ne, !=)
+COMPARE (lt, <)
+COMPARE (le, <=)
+COMPARE (gt, >)
+COMPARE (ge, >=)
+
+/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */
+/* { dg-add-options arm_neon } */
+
+#define ult(a, b) (!__builtin_isgreaterequal (a, b))
+#define ule(a, b) (!__builtin_isgreater (a, b))
+#define ugt(a, b) (!__builtin_islessequal (a, b))
+#define uge(a, b) (!__builtin_isless (a, b))
+
+int x[16];
+float a[16];
+float b[16];
+
+#define COMPARE(NAME) \
+ void \
+ cmp_##NAME##_reg (void) \
+ { \
+ for (int i = 0; i < 16; ++i) \
+ x[i] = NAME (a[i], b[i]) ? 2 : 0; \
+ } \
+ \
+ void \
+ cmp_##NAME##_zero (void) \
+ { \
+ for (int i = 0; i < 16; ++i) \
+ x[i] = NAME (a[i], 0) ? 2 : 0; \
+ }
+
+typedef int int_vec __attribute__((vector_size(16)));
+typedef float vec __attribute__((vector_size(16)));
+
+COMPARE (ult)
+COMPARE (ule)
+COMPARE (ugt)
+COMPARE (uge)
+
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */
+/* { dg-add-options arm_neon } */
+
+#define ordered(a, b) (!__builtin_isunordered (a, b))
+#define unordered(a, b) (__builtin_isunordered (a, b))
+
+int x[16];
+float a[16];
+float b[16];
+
+#define COMPARE(NAME) \
+ void \
+ cmp_##NAME##_reg (void) \
+ { \
+ for (int i = 0; i < 16; ++i) \
+ x[i] = NAME (a[i], b[i]) ? 2 : 0; \
+ } \
+ \
+ void \
+ cmp_##NAME##_zero (void) \
+ { \
+ for (int i = 0; i < 16; ++i) \
+ x[i] = NAME (a[i], 0) ? 2 : 0; \
+ }
+
+typedef int int_vec __attribute__((vector_size(16)));
+typedef float vec __attribute__((vector_size(16)));
+
+COMPARE (ordered)
+COMPARE (unordered)
+
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */
+/* { dg-add-options arm_neon } */
+
+#define uneq(a, b) (!__builtin_islessgreater (a, b))
+/* RTL's LTGT is a signaling comparison. */
+#define ltgt(a, b) (a < b || b < a)
+
+int x[16];
+float a[16];
+float b[16];
+
+#define COMPARE(NAME) \
+ void \
+ cmp_##NAME##_reg (void) \
+ { \
+ for (int i = 0; i < 16; ++i) \
+ x[i] = NAME (a[i], b[i]) ? 2 : 0; \
+ } \
+ \
+ void \
+ cmp_##NAME##_zero (void) \
+ { \
+ for (int i = 0; i < 16; ++i) \
+ x[i] = NAME (a[i], 0) ? 2 : 0; \
+ }
+
+typedef int int_vec __attribute__((vector_size(16)));
+typedef float vec __attribute__((vector_size(16)));
+
+COMPARE (uneq)
+COMPARE (ltgt)
+
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
}
}
-/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
}
}
-/* { dg-final { scan-assembler-times "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
+/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
/* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
}
}
-/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
-/* { dg-final { scan-assembler "vcge\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
+/* { dg-final { scan-assembler "vcle\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
/* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
expr { [istarget i?86-*-*] || [istarget x86_64-*-*]
|| [istarget aarch64*-*-*]
|| [istarget powerpc*-*-*]
+ || ([istarget arm*-*-*]
+ && [check_effective_target_arm_neon_ok])
|| ([istarget mips*-*-*]
&& [et-is-effective-target mips_msa])
|| ([istarget s390*-*-*]