From: Richard Sandiford Date: Thu, 1 Oct 2020 16:41:15 +0000 (+0100) Subject: arm: Add missing vec_cmp and vcond patterns X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c2978b3405884e38429c1937f416753ca88d3cd6;p=gcc.git arm: Add missing vec_cmp and vcond patterns This patch does several things at once: (1) Add vector compare patterns (vec_cmp and vec_cmpu). (2) Add vector selects between floating-point modes when the values being compared are integers (affects vcond and vcondu). (3) Add vector selects between integer modes when the values being compared are floating-point (affects vcond). (4) Add standalone vector select patterns (vcond_mask). (5) Tweak the handling of compound comparisons with zeros. Unfortunately it proved too difficult (for me) to separate this out into a series of smaller patches, since everything is so inter-related. Defining only some of the new patterns does not leave things in a happy state. The handling of comparisons is mostly taken from the vcond patterns. This means that it remains non-compliant with IEEE: “quiet” comparisons use signalling instructions. But that shouldn't matter for floats, since we require -funsafe-math-optimizations to vectorize for them anyway. It remains the case that comparisons and selects aren't implemented at all for HF vectors. Implementing those feels like separate work. gcc/ PR target/96528 PR target/97288 * config/arm/arm-protos.h (arm_expand_vector_compare): Declare. (arm_expand_vcond): Likewise. * config/arm/arm.c (arm_expand_vector_compare): New function. (arm_expand_vcond): Likewise. * config/arm/neon.md (vec_cmp): New pattern. (vec_cmpu): Likewise. (vcond): Require operand 5 to be a register or zero. Use arm_expand_vcond. (vcond): New pattern. (vcondu): Generalize to... (vcondu): New pattern. (neon_vc, neon_vc_insn): Add "@" marker. (neon_vbsl): Likewise. (neon_vcu): Reexpress as... (@neon_vc): ...this. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_vect_cond_mixed): Add arm neon targets. * gcc.target/arm/neon-compare-1.c: New test. * gcc.target/arm/neon-compare-2.c: Likewise. * gcc.target/arm/neon-compare-3.c: Likewise. * gcc.target/arm/neon-compare-4.c: Likewise. * gcc.target/arm/neon-compare-5.c: Likewise. * gcc.target/arm/neon-vcond-gt.c: Expect comparisons with zero. * gcc.target/arm/neon-vcond-ltgt.c: Likewise. * gcc.target/arm/neon-vcond-unordered.c: Likewise. --- diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 9bb9c61967b..703d6160c24 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -372,9 +372,11 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, extern bool arm_fusion_enabled_p (tune_params::fuse_ops); extern bool arm_valid_symbolic_address_p (rtx); extern bool arm_validize_comparison (rtx *, rtx *, rtx *); +extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool); #endif /* RTX_CODE */ extern bool arm_gen_setmem (rtx *); +extern void arm_expand_vcond (rtx *, machine_mode); extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 8105b39e7a4..0e23246c27b 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -30634,6 +30634,127 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, arm_post_atomic_barrier (model); } +/* Expand code to compare vectors OP0 and OP1 using condition CODE. + If CAN_INVERT, store either the result or its inverse in TARGET + and return true if TARGET contains the inverse. If !CAN_INVERT, + always store the result in TARGET, never its inverse. + + Note that the handling of floating-point comparisons is not + IEEE compliant. */ + +bool +arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, + bool can_invert) +{ + machine_mode cmp_result_mode = GET_MODE (target); + machine_mode cmp_mode = GET_MODE (op0); + + bool inverted; + switch (code) + { + /* For these we need to compute the inverse of the requested + comparison. */ + case UNORDERED: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case UNEQ: + case NE: + code = reverse_condition_maybe_unordered (code); + if (!can_invert) + { + /* Recursively emit the inverted comparison into a temporary + and then store its inverse in TARGET. This avoids reusing + TARGET (which for integer NE could be one of the inputs). */ + rtx tmp = gen_reg_rtx (cmp_result_mode); + if (arm_expand_vector_compare (tmp, code, op0, op1, true)) + gcc_unreachable (); + emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp))); + return false; + } + inverted = true; + break; + + default: + inverted = false; + break; + } + + switch (code) + { + /* These are natively supported for zero comparisons, but otherwise + require the operands to be swapped. */ + case LE: + case LT: + if (op1 != CONST0_RTX (cmp_mode)) + { + code = swap_condition (code); + std::swap (op0, op1); + } + /* Fall through. */ + + /* These are natively supported for both register and zero operands. */ + case EQ: + case GE: + case GT: + emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1)); + return inverted; + + /* These are natively supported for register operands only. + Comparisons with zero aren't useful and should be folded + or canonicalized by target-independent code. */ + case GEU: + case GTU: + emit_insn (gen_neon_vc (code, cmp_mode, target, + op0, force_reg (cmp_mode, op1))); + return inverted; + + /* These require the operands to be swapped and likewise do not + support comparisons with zero. */ + case LEU: + case LTU: + emit_insn (gen_neon_vc (swap_condition (code), cmp_mode, + target, force_reg (cmp_mode, op1), op0)); + return inverted; + + /* These need a combination of two comparisons. */ + case LTGT: + case ORDERED: + { + /* Operands are LTGT iff (a > b || a > b). + Operands are ORDERED iff (a > b || a <= b). */ + rtx gt_res = gen_reg_rtx (cmp_result_mode); + rtx alt_res = gen_reg_rtx (cmp_result_mode); + rtx_code alt_code = (code == LTGT ? LT : LE); + if (arm_expand_vector_compare (gt_res, GT, op0, op1, true) + || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true)) + gcc_unreachable (); + emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode, + gt_res, alt_res))); + return inverted; + } + + default: + gcc_unreachable (); + } +} + +/* Expand a vcond or vcondu pattern with operands OPERANDS. + CMP_RESULT_MODE is the mode of the comparison result. */ + +void +arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode) +{ + rtx mask = gen_reg_rtx (cmp_result_mode); + bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]), + operands[4], operands[5], true); + if (inverted) + std::swap (operands[1], operands[2]); + emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0], + mask, operands[1], operands[2])); +} + #define MAX_VECT_LEN 16 struct expand_vec_perm_d diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 96bf277f501..58832cbf484 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1530,6 +1530,30 @@ [(set_attr "type" "neon_qsub")] ) +(define_expand "vec_cmp" + [(set (match_operand: 0 "s_register_operand") + (match_operator: 1 "comparison_operator" + [(match_operand:VDQW 2 "s_register_operand") + (match_operand:VDQW 3 "reg_or_zero_operand")]))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), + operands[2], operands[3], false); + DONE; +}) + +(define_expand "vec_cmpu" + [(set (match_operand:VDQIW 0 "s_register_operand") + (match_operator:VDQIW 1 "comparison_operator" + [(match_operand:VDQIW 2 "s_register_operand") + (match_operand:VDQIW 3 "reg_or_zero_operand")]))] + "TARGET_NEON" +{ + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), + operands[2], operands[3], false); + DONE; +}) + ;; Conditional instructions. These are comparisons with conditional moves for ;; vectors. They perform the assignment: ;; @@ -1543,230 +1567,53 @@ (if_then_else:VDQW (match_operator 3 "comparison_operator" [(match_operand:VDQW 4 "s_register_operand") - (match_operand:VDQW 5 "nonmemory_operand")]) + (match_operand:VDQW 5 "reg_or_zero_operand")]) (match_operand:VDQW 1 "s_register_operand") (match_operand:VDQW 2 "s_register_operand")))] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" { - int inverse = 0; - int use_zero_form = 0; - int swap_bsl_operands = 0; - rtx mask = gen_reg_rtx (mode); - rtx tmp = gen_reg_rtx (mode); - - rtx (*base_comparison) (rtx, rtx, rtx); - rtx (*complimentary_comparison) (rtx, rtx, rtx); - - switch (GET_CODE (operands[3])) - { - case GE: - case GT: - case LE: - case LT: - case EQ: - if (operands[5] == CONST0_RTX (mode)) - { - use_zero_form = 1; - break; - } - /* Fall through. */ - default: - if (!REG_P (operands[5])) - operands[5] = force_reg (mode, operands[5]); - } - - switch (GET_CODE (operands[3])) - { - case LT: - case UNLT: - inverse = 1; - /* Fall through. */ - case GE: - case UNGE: - case ORDERED: - case UNORDERED: - base_comparison = gen_neon_vcge; - complimentary_comparison = gen_neon_vcgt; - break; - case LE: - case UNLE: - inverse = 1; - /* Fall through. */ - case GT: - case UNGT: - base_comparison = gen_neon_vcgt; - complimentary_comparison = gen_neon_vcge; - break; - case EQ: - case NE: - case UNEQ: - base_comparison = gen_neon_vceq; - complimentary_comparison = gen_neon_vceq; - break; - default: - gcc_unreachable (); - } - - switch (GET_CODE (operands[3])) - { - case LT: - case LE: - case GT: - case GE: - case EQ: - /* The easy case. Here we emit one of vcge, vcgt or vceq. - As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: - a GE b -> a GE b - a GT b -> a GT b - a LE b -> b GE a - a LT b -> b GT a - a EQ b -> a EQ b - Note that there also exist direct comparison against 0 forms, - so catch those as a special case. */ - if (use_zero_form) - { - inverse = 0; - switch (GET_CODE (operands[3])) - { - case LT: - base_comparison = gen_neon_vclt; - break; - case LE: - base_comparison = gen_neon_vcle; - break; - default: - /* Do nothing, other zero form cases already have the correct - base_comparison. */ - break; - } - } - - if (!inverse) - emit_insn (base_comparison (mask, operands[4], operands[5])); - else - emit_insn (complimentary_comparison (mask, operands[5], operands[4])); - break; - case UNLT: - case UNLE: - case UNGT: - case UNGE: - case NE: - /* Vector compare returns false for lanes which are unordered, so if we use - the inverse of the comparison we actually want to emit, then - swap the operands to BSL, we will end up with the correct result. - Note that a NE NaN and NaN NE b are true for all a, b. - - Our transformations are: - a GE b -> !(b GT a) - a GT b -> !(b GE a) - a LE b -> !(a GT b) - a LT b -> !(a GE b) - a NE b -> !(a EQ b) */ - - if (inverse) - emit_insn (base_comparison (mask, operands[4], operands[5])); - else - emit_insn (complimentary_comparison (mask, operands[5], operands[4])); - - swap_bsl_operands = 1; - break; - case UNEQ: - /* We check (a > b || b > a). combining these comparisons give us - true iff !(a != b && a ORDERED b), swapping the operands to BSL - will then give us (a == b || a UNORDERED b) as intended. */ - - emit_insn (gen_neon_vcgt (mask, operands[4], operands[5])); - emit_insn (gen_neon_vcgt (tmp, operands[5], operands[4])); - emit_insn (gen_ior3 (mask, mask, tmp)); - swap_bsl_operands = 1; - break; - case UNORDERED: - /* Operands are ORDERED iff (a > b || b >= a). - Swapping the operands to BSL will give the UNORDERED case. */ - swap_bsl_operands = 1; - /* Fall through. */ - case ORDERED: - emit_insn (gen_neon_vcgt (tmp, operands[4], operands[5])); - emit_insn (gen_neon_vcge (mask, operands[5], operands[4])); - emit_insn (gen_ior3 (mask, mask, tmp)); - break; - default: - gcc_unreachable (); - } + arm_expand_vcond (operands, mode); + DONE; +}) - if (swap_bsl_operands) - emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], - operands[1])); - else - emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], - operands[2])); +(define_expand "vcond" + [(set (match_operand: 0 "s_register_operand") + (if_then_else: + (match_operator 3 "comparison_operator" + [(match_operand:V32 4 "s_register_operand") + (match_operand:V32 5 "reg_or_zero_operand")]) + (match_operand: 1 "s_register_operand") + (match_operand: 2 "s_register_operand")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + arm_expand_vcond (operands, mode); DONE; }) -(define_expand "vcondu" - [(set (match_operand:VDQIW 0 "s_register_operand") - (if_then_else:VDQIW +(define_expand "vcondu" + [(set (match_operand:VDQW 0 "s_register_operand") + (if_then_else:VDQW (match_operator 3 "arm_comparison_operator" - [(match_operand:VDQIW 4 "s_register_operand") - (match_operand:VDQIW 5 "s_register_operand")]) - (match_operand:VDQIW 1 "s_register_operand") - (match_operand:VDQIW 2 "s_register_operand")))] + [(match_operand: 4 "s_register_operand") + (match_operand: 5 "reg_or_zero_operand")]) + (match_operand:VDQW 1 "s_register_operand") + (match_operand:VDQW 2 "s_register_operand")))] "TARGET_NEON" { - rtx mask; - int inverse = 0, immediate_zero = 0; - - mask = gen_reg_rtx (mode); - - if (operands[5] == CONST0_RTX (mode)) - immediate_zero = 1; - else if (!REG_P (operands[5])) - operands[5] = force_reg (mode, operands[5]); - - switch (GET_CODE (operands[3])) - { - case GEU: - emit_insn (gen_neon_vcgeu (mask, operands[4], operands[5])); - break; - - case GTU: - emit_insn (gen_neon_vcgtu (mask, operands[4], operands[5])); - break; - - case EQ: - emit_insn (gen_neon_vceq (mask, operands[4], operands[5])); - break; - - case LEU: - if (immediate_zero) - emit_insn (gen_neon_vcle (mask, operands[4], operands[5])); - else - emit_insn (gen_neon_vcgeu (mask, operands[5], operands[4])); - break; - - case LTU: - if (immediate_zero) - emit_insn (gen_neon_vclt (mask, operands[4], operands[5])); - else - emit_insn (gen_neon_vcgtu (mask, operands[5], operands[4])); - break; - - case NE: - emit_insn (gen_neon_vceq (mask, operands[4], operands[5])); - inverse = 1; - break; - - default: - gcc_unreachable (); - } - - if (inverse) - emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], - operands[1])); - else - emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], - operands[2])); + arm_expand_vcond (operands, mode); + DONE; +}) +(define_expand "vcond_mask_" + [(set (match_operand:VDQW 0 "s_register_operand") + (if_then_else:VDQW + (match_operand: 3 "s_register_operand") + (match_operand:VDQW 1 "s_register_operand") + (match_operand:VDQW 2 "s_register_operand")))] + "TARGET_NEON" +{ + emit_insn (gen_neon_vbsl (operands[0], operands[3], operands[1], + operands[2])); DONE; }) @@ -2601,7 +2448,7 @@ ;; These may expand to an UNSPEC pattern when a floating point mode is used ;; without unsafe math optimizations. -(define_expand "neon_vc" +(define_expand "@neon_vc" [(match_operand: 0 "s_register_operand") (neg: (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand") @@ -2641,7 +2488,7 @@ } ) -(define_insn "neon_vc_insn" +(define_insn "@neon_vc_insn" [(set (match_operand: 0 "s_register_operand" "=w,w") (neg: (COMPARISONS: @@ -2685,7 +2532,7 @@ [(set_attr "type" "neon_fp_compare_s")] ) -(define_expand "neon_vc" +(define_expand "@neon_vc" [(match_operand: 0 "s_register_operand") (neg: (COMPARISONS:VH @@ -2751,7 +2598,7 @@ } [(set_attr "type" "neon_fp_compare_s")]) -(define_insn "neon_vcu" +(define_insn "@neon_vc" [(set (match_operand: 0 "s_register_operand" "=w") (neg: (GTUGEU: @@ -4708,7 +4555,7 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_bsl")] ) -(define_expand "neon_vbsl" +(define_expand "@neon_vbsl" [(set (match_operand:VDQX 0 "s_register_operand") (unspec:VDQX [(match_operand: 1 "s_register_operand") (match_operand:VDQX 2 "s_register_operand") diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-1.c b/gcc/testsuite/gcc.target/arm/neon-compare-1.c new file mode 100644 index 00000000000..c915eca6dbe --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-compare-1.c @@ -0,0 +1,84 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O1" } */ +/* { dg-add-options arm_neon } */ + +#define COMPARE_REG(NAME, OP, TYPE) \ + TYPE \ + cmp_##NAME##_##TYPE##_reg (TYPE a, TYPE b) \ + { \ + return a OP b; \ + } + +#define COMPARE_REG_AND_ZERO(NAME, OP, TYPE) \ + COMPARE_REG (NAME, OP, TYPE) \ + \ + TYPE \ + cmp_##NAME##_##TYPE##_zero (TYPE a) \ + { \ + return a OP (TYPE) {}; \ + } + +#define COMPARE_TYPE(TYPE, COMPARE_ORDERED) \ + COMPARE_REG_AND_ZERO (eq, ==, TYPE) \ + COMPARE_REG_AND_ZERO (ne, !=, TYPE) \ + COMPARE_ORDERED (lt, <, TYPE) \ + COMPARE_ORDERED (le, <=, TYPE) \ + COMPARE_ORDERED (gt, >, TYPE) \ + COMPARE_ORDERED (ge, >=, TYPE) + +#define TEST_TYPE(NAME, ELEM, COMPARE_ORDERED) \ + typedef ELEM NAME __attribute__((vector_size(16))); \ + COMPARE_TYPE (NAME, COMPARE_ORDERED) + +TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO) +TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG) +TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO) +TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG) +TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO) +TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG) + +/* { s8, u8 } x { eq, ne }. +/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, #0\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvclt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcle.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ + +/* { s16, u16 } x { eq, ne }. +/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, #0\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvclt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcle.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ + +/* { s32, u32 } x { eq, ne }. +/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, #0\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvclt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcle.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-2.c b/gcc/testsuite/gcc.target/arm/neon-compare-2.c new file mode 100644 index 00000000000..559c5e5d8fc --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-compare-2.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O1 -funsafe-math-optimizations" } */ +/* { dg-add-options arm_neon } */ + +#ifndef ELEM_TYPE +#define ELEM_TYPE float +#endif +#ifndef INT_ELEM_TYPE +#define INT_ELEM_TYPE __INT32_TYPE__ +#endif + +#define COMPARE(NAME, OP) \ + int_vec \ + cmp_##NAME##_reg (vec a, vec b) \ + { \ + return a OP b; \ + } \ + \ + int_vec \ + cmp_##NAME##_zero (vec a) \ + { \ + return a OP (vec) {}; \ + } + +typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16))); +typedef ELEM_TYPE vec __attribute__((vector_size(16))); + +COMPARE (eq, ==) +COMPARE (ne, !=) +COMPARE (lt, <) +COMPARE (le, <=) +COMPARE (gt, >) +COMPARE (ge, >=) + +/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-3.c b/gcc/testsuite/gcc.target/arm/neon-compare-3.c new file mode 100644 index 00000000000..efbe79728a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-compare-3.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */ +/* { dg-add-options arm_neon } */ + +#define ult(a, b) (!__builtin_isgreaterequal (a, b)) +#define ule(a, b) (!__builtin_isgreater (a, b)) +#define ugt(a, b) (!__builtin_islessequal (a, b)) +#define uge(a, b) (!__builtin_isless (a, b)) + +int x[16]; +float a[16]; +float b[16]; + +#define COMPARE(NAME) \ + void \ + cmp_##NAME##_reg (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], b[i]) ? 2 : 0; \ + } \ + \ + void \ + cmp_##NAME##_zero (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], 0) ? 2 : 0; \ + } + +typedef int int_vec __attribute__((vector_size(16))); +typedef float vec __attribute__((vector_size(16))); + +COMPARE (ult) +COMPARE (ule) +COMPARE (ugt) +COMPARE (uge) + +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-4.c b/gcc/testsuite/gcc.target/arm/neon-compare-4.c new file mode 100644 index 00000000000..3f8cc906c66 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-compare-4.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */ +/* { dg-add-options arm_neon } */ + +#define ordered(a, b) (!__builtin_isunordered (a, b)) +#define unordered(a, b) (__builtin_isunordered (a, b)) + +int x[16]; +float a[16]; +float b[16]; + +#define COMPARE(NAME) \ + void \ + cmp_##NAME##_reg (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], b[i]) ? 2 : 0; \ + } \ + \ + void \ + cmp_##NAME##_zero (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], 0) ? 2 : 0; \ + } + +typedef int int_vec __attribute__((vector_size(16))); +typedef float vec __attribute__((vector_size(16))); + +COMPARE (ordered) +COMPARE (unordered) + +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-5.c b/gcc/testsuite/gcc.target/arm/neon-compare-5.c new file mode 100644 index 00000000000..cb6428d3f08 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-compare-5.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */ +/* { dg-add-options arm_neon } */ + +#define uneq(a, b) (!__builtin_islessgreater (a, b)) +/* RTL's LTGT is a signaling comparison. */ +#define ltgt(a, b) (a < b || b < a) + +int x[16]; +float a[16]; +float b[16]; + +#define COMPARE(NAME) \ + void \ + cmp_##NAME##_reg (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], b[i]) ? 2 : 0; \ + } \ + \ + void \ + cmp_##NAME##_zero (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], 0) ? 2 : 0; \ + } + +typedef int int_vec __attribute__((vector_size(16))); +typedef float vec __attribute__((vector_size(16))); + +COMPARE (uneq) +COMPARE (ltgt) + +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vcond-gt.c b/gcc/testsuite/gcc.target/arm/neon-vcond-gt.c index 8e9f3785169..9f601a169d1 100644 --- a/gcc/testsuite/gcc.target/arm/neon-vcond-gt.c +++ b/gcc/testsuite/gcc.target/arm/neon-vcond-gt.c @@ -13,5 +13,5 @@ void foo (int ilast,float* w, float* w2) } } -/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ +/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */ /* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c b/gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c index c8306e364a3..74bc22046ad 100644 --- a/gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c +++ b/gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c @@ -13,6 +13,7 @@ void foo (int ilast,float* w, float* w2) } } -/* { dg-final { scan-assembler-times "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" 2 } } */ +/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */ +/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */ /* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ /* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c b/gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c index 3bb67d3afe3..8d3187541e2 100644 --- a/gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c +++ b/gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c @@ -13,7 +13,7 @@ void foo (int ilast,float* w, float* w2) } } -/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ -/* { dg-final { scan-assembler "vcge\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ +/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */ +/* { dg-final { scan-assembler "vcle\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */ /* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ /* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 38af6784de8..15f0649f8ae 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7233,6 +7233,8 @@ proc check_effective_target_vect_cond_mixed { } { expr { [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget aarch64*-*-*] || [istarget powerpc*-*-*] + || ([istarget arm*-*-*] + && [check_effective_target_arm_neon_ok]) || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*]