arm: Add missing vec_cmp and vcond patterns
authorRichard Sandiford <richard.sandiford@arm.com>
Thu, 1 Oct 2020 16:41:15 +0000 (17:41 +0100)
committerRichard Sandiford <richard.sandiford@arm.com>
Thu, 1 Oct 2020 16:41:15 +0000 (17:41 +0100)
This patch does several things at once:

(1) Add vector compare patterns (vec_cmp and vec_cmpu).

(2) Add vector selects between floating-point modes when the
    values being compared are integers (affects vcond and vcondu).

(3) Add vector selects between integer modes when the values being
    compared are floating-point (affects vcond).

(4) Add standalone vector select patterns (vcond_mask).

(5) Tweak the handling of compound comparisons with zeros.

Unfortunately it proved too difficult (for me) to separate this
out into a series of smaller patches, since everything is so
inter-related.  Defining only some of the new patterns does
not leave things in a happy state.

The handling of comparisons is mostly taken from the vcond patterns.
This means that it remains non-compliant with IEEE: “quiet” comparisons
use signalling instructions.  But that shouldn't matter for floats,
since we require -funsafe-math-optimizations to vectorize for them
anyway.

It remains the case that comparisons and selects aren't implemented
at all for HF vectors.  Implementing those feels like separate work.

gcc/
PR target/96528
PR target/97288
* config/arm/arm-protos.h (arm_expand_vector_compare): Declare.
(arm_expand_vcond): Likewise.
* config/arm/arm.c (arm_expand_vector_compare): New function.
(arm_expand_vcond): Likewise.
* config/arm/neon.md (vec_cmp<VDQW:mode><v_cmp_result>): New pattern.
(vec_cmpu<VDQW:mode><VDQW:mode>): Likewise.
(vcond<VDQW:mode><VDQW:mode>): Require operand 5 to be a register
or zero.  Use arm_expand_vcond.
(vcond<V_cvtto><V32:mode>): New pattern.
(vcondu<VDQIW:mode><VDQIW:mode>): Generalize to...
(vcondu<VDQW:mode><v_cmp_result): ...this.  Require operand 5
to be a register or zero.  Use arm_expand_vcond.
(vcond_mask_<VDQW:mode><v_cmp_result>): New pattern.
(neon_vc<cmp_op><mode>, neon_vc<cmp_op><mode>_insn): Add "@" marker.
(neon_vbsl<mode>): Likewise.
(neon_vc<cmp_op>u<mode>): Reexpress as...
(@neon_vc<code><mode>): ...this.

gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_cond_mixed): Add
arm neon targets.
* gcc.target/arm/neon-compare-1.c: New test.
* gcc.target/arm/neon-compare-2.c: Likewise.
* gcc.target/arm/neon-compare-3.c: Likewise.
* gcc.target/arm/neon-compare-4.c: Likewise.
* gcc.target/arm/neon-compare-5.c: Likewise.
* gcc.target/arm/neon-vcond-gt.c: Expect comparisons with zero.
* gcc.target/arm/neon-vcond-ltgt.c: Likewise.
* gcc.target/arm/neon-vcond-unordered.c: Likewise.

12 files changed:
gcc/config/arm/arm-protos.h
gcc/config/arm/arm.c
gcc/config/arm/neon.md
gcc/testsuite/gcc.target/arm/neon-compare-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/neon-compare-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/neon-compare-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/neon-compare-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/neon-compare-5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/neon-vcond-gt.c
gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c
gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c
gcc/testsuite/lib/target-supports.exp

index 9bb9c61967b59d32c2f12417dd700708568ea482..703d6160c242003ed67d6ccab718c39f1fbf7c14 100644 (file)
@@ -372,9 +372,11 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
 extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
 extern bool arm_valid_symbolic_address_p (rtx);
 extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
+extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
 #endif /* RTX_CODE */
 
 extern bool arm_gen_setmem (rtx *);
+extern void arm_expand_vcond (rtx *, machine_mode);
 extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
 
 extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes);
index 8105b39e7a41b47ccf8ff2173c914c670eb1153c..0e23246c27b2a0a53da4ab0fdad8c2eb904fc5ca 100644 (file)
@@ -30634,6 +30634,127 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
     arm_post_atomic_barrier (model);
 }
 \f
+/* Expand code to compare vectors OP0 and OP1 using condition CODE.
+   If CAN_INVERT, store either the result or its inverse in TARGET
+   and return true if TARGET contains the inverse.  If !CAN_INVERT,
+   always store the result in TARGET, never its inverse.
+
+   Note that the handling of floating-point comparisons is not
+   IEEE compliant.  */
+
+bool
+arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
+                          bool can_invert)
+{
+  machine_mode cmp_result_mode = GET_MODE (target);
+  machine_mode cmp_mode = GET_MODE (op0);
+
+  bool inverted;
+  switch (code)
+    {
+    /* For these we need to compute the inverse of the requested
+       comparison.  */
+    case UNORDERED:
+    case UNLT:
+    case UNLE:
+    case UNGT:
+    case UNGE:
+    case UNEQ:
+    case NE:
+      code = reverse_condition_maybe_unordered (code);
+      if (!can_invert)
+       {
+         /* Recursively emit the inverted comparison into a temporary
+            and then store its inverse in TARGET.  This avoids reusing
+            TARGET (which for integer NE could be one of the inputs).  */
+         rtx tmp = gen_reg_rtx (cmp_result_mode);
+         if (arm_expand_vector_compare (tmp, code, op0, op1, true))
+           gcc_unreachable ();
+         emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
+         return false;
+       }
+      inverted = true;
+      break;
+
+    default:
+      inverted = false;
+      break;
+    }
+
+  switch (code)
+    {
+    /* These are natively supported for zero comparisons, but otherwise
+       require the operands to be swapped.  */
+    case LE:
+    case LT:
+      if (op1 != CONST0_RTX (cmp_mode))
+       {
+         code = swap_condition (code);
+         std::swap (op0, op1);
+       }
+      /* Fall through.  */
+
+    /* These are natively supported for both register and zero operands.  */
+    case EQ:
+    case GE:
+    case GT:
+      emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
+      return inverted;
+
+    /* These are natively supported for register operands only.
+       Comparisons with zero aren't useful and should be folded
+       or canonicalized by target-independent code.  */
+    case GEU:
+    case GTU:
+      emit_insn (gen_neon_vc (code, cmp_mode, target,
+                             op0, force_reg (cmp_mode, op1)));
+      return inverted;
+
+    /* These require the operands to be swapped and likewise do not
+       support comparisons with zero.  */
+    case LEU:
+    case LTU:
+      emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
+                             target, force_reg (cmp_mode, op1), op0));
+      return inverted;
+
+    /* These need a combination of two comparisons.  */
+    case LTGT:
+    case ORDERED:
+      {
+       /* Operands are LTGT iff (a > b || a > b).
+          Operands are ORDERED iff (a > b || a <= b).  */
+       rtx gt_res = gen_reg_rtx (cmp_result_mode);
+       rtx alt_res = gen_reg_rtx (cmp_result_mode);
+       rtx_code alt_code = (code == LTGT ? LT : LE);
+       if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
+           || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
+         gcc_unreachable ();
+       emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
+                                                    gt_res, alt_res)));
+       return inverted;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Expand a vcond or vcondu pattern with operands OPERANDS.
+   CMP_RESULT_MODE is the mode of the comparison result.  */
+
+void
+arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
+{
+  rtx mask = gen_reg_rtx (cmp_result_mode);
+  bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
+                                            operands[4], operands[5], true);
+  if (inverted)
+    std::swap (operands[1], operands[2]);
+  emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
+                           mask, operands[1], operands[2]));
+}
+\f
 #define MAX_VECT_LEN 16
 
 struct expand_vec_perm_d
index 96bf277f5016c1c8b3c42ca396157c8fd07d7b20..58832cbf484a706bce9d98d13f7fe1e33fd47a23 100644 (file)
   [(set_attr "type" "neon_qsub<q>")]
 )
 
+(define_expand "vec_cmp<mode><v_cmp_result>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
+       (match_operator:<V_cmp_result> 1 "comparison_operator"
+         [(match_operand:VDQW 2 "s_register_operand")
+          (match_operand:VDQW 3 "reg_or_zero_operand")]))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+                            operands[2], operands[3], false);
+  DONE;
+})
+
+(define_expand "vec_cmpu<mode><mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand")
+       (match_operator:VDQIW 1 "comparison_operator"
+         [(match_operand:VDQIW 2 "s_register_operand")
+          (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
+  "TARGET_NEON"
+{
+  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+                            operands[2], operands[3], false);
+  DONE;
+})
+
 ;; Conditional instructions.  These are comparisons with conditional moves for
 ;; vectors.  They perform the assignment:
 ;;   
        (if_then_else:VDQW
          (match_operator 3 "comparison_operator"
            [(match_operand:VDQW 4 "s_register_operand")
-            (match_operand:VDQW 5 "nonmemory_operand")])
+            (match_operand:VDQW 5 "reg_or_zero_operand")])
          (match_operand:VDQW 1 "s_register_operand")
          (match_operand:VDQW 2 "s_register_operand")))]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 {
-  int inverse = 0;
-  int use_zero_form = 0;
-  int swap_bsl_operands = 0;
-  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
-  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
-
-  rtx (*base_comparison) (rtx, rtx, rtx);
-  rtx (*complimentary_comparison) (rtx, rtx, rtx);
-
-  switch (GET_CODE (operands[3]))
-    {
-    case GE:
-    case GT:
-    case LE:
-    case LT:
-    case EQ:
-      if (operands[5] == CONST0_RTX (<MODE>mode))
-       {
-         use_zero_form = 1;
-         break;
-       }
-      /* Fall through.  */
-    default:
-      if (!REG_P (operands[5]))
-       operands[5] = force_reg (<MODE>mode, operands[5]);
-    }
-
-  switch (GET_CODE (operands[3]))
-    {
-    case LT:
-    case UNLT:
-      inverse = 1;
-      /* Fall through.  */
-    case GE:
-    case UNGE:
-    case ORDERED:
-    case UNORDERED:
-      base_comparison = gen_neon_vcge<mode>;
-      complimentary_comparison = gen_neon_vcgt<mode>;
-      break;
-    case LE:
-    case UNLE:
-      inverse = 1;
-      /* Fall through.  */
-    case GT:
-    case UNGT:
-      base_comparison = gen_neon_vcgt<mode>;
-      complimentary_comparison = gen_neon_vcge<mode>;
-      break;
-    case EQ:
-    case NE:
-    case UNEQ:
-      base_comparison = gen_neon_vceq<mode>;
-      complimentary_comparison = gen_neon_vceq<mode>;
-      break;
-    default:
-      gcc_unreachable ();
-    }
-
-  switch (GET_CODE (operands[3]))
-    {
-    case LT:
-    case LE:
-    case GT:
-    case GE:
-    case EQ:
-      /* The easy case.  Here we emit one of vcge, vcgt or vceq.
-        As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
-        a GE b -> a GE b
-        a GT b -> a GT b
-        a LE b -> b GE a
-        a LT b -> b GT a
-        a EQ b -> a EQ b
-        Note that there also exist direct comparison against 0 forms,
-        so catch those as a special case.  */
-      if (use_zero_form)
-       {
-         inverse = 0;
-         switch (GET_CODE (operands[3]))
-           {
-           case LT:
-             base_comparison = gen_neon_vclt<mode>;
-             break;
-           case LE:
-             base_comparison = gen_neon_vcle<mode>;
-             break;
-           default:
-             /* Do nothing, other zero form cases already have the correct
-                base_comparison.  */
-             break;
-           }
-       }
-
-      if (!inverse)
-       emit_insn (base_comparison (mask, operands[4], operands[5]));
-      else
-       emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
-      break;
-    case UNLT:
-    case UNLE:
-    case UNGT:
-    case UNGE:
-    case NE:
-      /* Vector compare returns false for lanes which are unordered, so if we use
-        the inverse of the comparison we actually want to emit, then
-        swap the operands to BSL, we will end up with the correct result.
-        Note that a NE NaN and NaN NE b are true for all a, b.
-
-        Our transformations are:
-        a GE b -> !(b GT a)
-        a GT b -> !(b GE a)
-        a LE b -> !(a GT b)
-        a LT b -> !(a GE b)
-        a NE b -> !(a EQ b)  */
-
-      if (inverse)
-       emit_insn (base_comparison (mask, operands[4], operands[5]));
-      else
-       emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
-
-      swap_bsl_operands = 1;
-      break;
-    case UNEQ:
-      /* We check (a > b ||  b > a).  combining these comparisons give us
-        true iff !(a != b && a ORDERED b), swapping the operands to BSL
-        will then give us (a == b ||  a UNORDERED b) as intended.  */
-
-      emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
-      emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
-      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
-      swap_bsl_operands = 1;
-      break;
-    case UNORDERED:
-       /* Operands are ORDERED iff (a > b || b >= a).
-        Swapping the operands to BSL will give the UNORDERED case.  */
-     swap_bsl_operands = 1;
-     /* Fall through.  */
-    case ORDERED:
-      emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
-      emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
-      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
-      break;
-    default:
-      gcc_unreachable ();
-    }
+  arm_expand_vcond (operands, <V_cmp_result>mode);
+  DONE;
+})
 
-  if (swap_bsl_operands)
-    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
-                                   operands[1]));
-  else
-    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
-                                   operands[2]));
+(define_expand "vcond<V_cvtto><mode>"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
+       (if_then_else:<V_CVTTO>
+         (match_operator 3 "comparison_operator"
+           [(match_operand:V32 4 "s_register_operand")
+            (match_operand:V32 5 "reg_or_zero_operand")])
+         (match_operand:<V_CVTTO> 1 "s_register_operand")
+         (match_operand:<V_CVTTO> 2 "s_register_operand")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  arm_expand_vcond (operands, <V_cmp_result>mode);
   DONE;
 })
 
-(define_expand "vcondu<mode><mode>"
-  [(set (match_operand:VDQIW 0 "s_register_operand")
-       (if_then_else:VDQIW
+(define_expand "vcondu<mode><v_cmp_result>"
+  [(set (match_operand:VDQW 0 "s_register_operand")
+       (if_then_else:VDQW
          (match_operator 3 "arm_comparison_operator"
-           [(match_operand:VDQIW 4 "s_register_operand")
-            (match_operand:VDQIW 5 "s_register_operand")])
-         (match_operand:VDQIW 1 "s_register_operand")
-         (match_operand:VDQIW 2 "s_register_operand")))]
+           [(match_operand:<V_cmp_result> 4 "s_register_operand")
+            (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
+         (match_operand:VDQW 1 "s_register_operand")
+         (match_operand:VDQW 2 "s_register_operand")))]
   "TARGET_NEON"
 {
-  rtx mask;
-  int inverse = 0, immediate_zero = 0;
-  
-  mask = gen_reg_rtx (<V_cmp_result>mode);
-  
-  if (operands[5] == CONST0_RTX (<MODE>mode))
-    immediate_zero = 1;
-  else if (!REG_P (operands[5]))
-    operands[5] = force_reg (<MODE>mode, operands[5]);
-  
-  switch (GET_CODE (operands[3]))
-    {
-    case GEU:
-      emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
-      break;
-    
-    case GTU:
-      emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
-      break;
-    
-    case EQ:
-      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
-      break;
-    
-    case LEU:
-      if (immediate_zero)
-       emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
-      else
-       emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
-      break;
-    
-    case LTU:
-      if (immediate_zero)
-        emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
-      else
-       emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
-      break;
-    
-    case NE:
-      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
-      inverse = 1;
-      break;
-    
-    default:
-      gcc_unreachable ();
-    }
-  
-  if (inverse)
-    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
-                                   operands[1]));
-  else
-    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
-                                   operands[2]));
+  arm_expand_vcond (operands, <V_cmp_result>mode);
+  DONE;
+})
 
+(define_expand "vcond_mask_<mode><v_cmp_result>"
+  [(set (match_operand:VDQW 0 "s_register_operand")
+       (if_then_else:VDQW
+         (match_operand:<V_cmp_result> 3 "s_register_operand")
+         (match_operand:VDQW 1 "s_register_operand")
+         (match_operand:VDQW 2 "s_register_operand")))]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
+                                 operands[2]));
   DONE;
 })
 
 
 ;; These may expand to an UNSPEC pattern when a floating point mode is used
 ;; without unsafe math optimizations.
-(define_expand "neon_vc<cmp_op><mode>"
+(define_expand "@neon_vc<cmp_op><mode>"
   [(match_operand:<V_cmp_result> 0 "s_register_operand")
      (neg:<V_cmp_result>
        (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
   }
 )
 
-(define_insn "neon_vc<cmp_op><mode>_insn"
+(define_insn "@neon_vc<cmp_op><mode>_insn"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
         (neg:<V_cmp_result>
           (COMPARISONS:<V_cmp_result>
   [(set_attr "type" "neon_fp_compare_s<q>")]
 )
 
-(define_expand "neon_vc<cmp_op><mode>"
+(define_expand "@neon_vc<cmp_op><mode>"
  [(match_operand:<V_cmp_result> 0 "s_register_operand")
   (neg:<V_cmp_result>
    (COMPARISONS:VH
 }
  [(set_attr "type" "neon_fp_compare_s<q>")])
 
-(define_insn "neon_vc<cmp_op>u<mode>"
+(define_insn "@neon_vc<code><mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
         (neg:<V_cmp_result>
           (GTUGEU:<V_cmp_result>
@@ -4708,7 +4555,7 @@ if (BYTES_BIG_ENDIAN)
   [(set_attr "type" "neon_bsl<q>")]
 )
 
-(define_expand "neon_vbsl<mode>"
+(define_expand "@neon_vbsl<mode>"
   [(set (match_operand:VDQX 0 "s_register_operand")
         (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
                       (match_operand:VDQX 2 "s_register_operand")
diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-1.c b/gcc/testsuite/gcc.target/arm/neon-compare-1.c
new file mode 100644 (file)
index 0000000..c915eca
--- /dev/null
@@ -0,0 +1,84 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1" }  */
+/* { dg-add-options arm_neon } */
+
+#define COMPARE_REG(NAME, OP, TYPE) \
+  TYPE \
+  cmp_##NAME##_##TYPE##_reg (TYPE a, TYPE b) \
+  { \
+    return a OP b; \
+  }
+
+#define COMPARE_REG_AND_ZERO(NAME, OP, TYPE) \
+  COMPARE_REG (NAME, OP, TYPE) \
+  \
+  TYPE \
+  cmp_##NAME##_##TYPE##_zero (TYPE a) \
+  { \
+    return a OP (TYPE) {}; \
+  }
+
+#define COMPARE_TYPE(TYPE, COMPARE_ORDERED) \
+  COMPARE_REG_AND_ZERO (eq, ==, TYPE) \
+  COMPARE_REG_AND_ZERO (ne, !=, TYPE) \
+  COMPARE_ORDERED (lt, <, TYPE) \
+  COMPARE_ORDERED (le, <=, TYPE) \
+  COMPARE_ORDERED (gt, >, TYPE) \
+  COMPARE_ORDERED (ge, >=, TYPE)
+
+#define TEST_TYPE(NAME, ELEM, COMPARE_ORDERED) \
+  typedef ELEM NAME __attribute__((vector_size(16))); \
+  COMPARE_TYPE (NAME, COMPARE_ORDERED)
+
+TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO)
+TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG)
+TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO)
+TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG)
+TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO)
+TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG)
+
+/* { s8, u8 } x { eq, ne }.
+/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvclt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcle.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+
+/* { s16, u16 } x { eq, ne }.
+/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvclt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcle.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+
+/* { s32, u32 } x { eq, ne }.
+/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, #0\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvclt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcle.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-2.c b/gcc/testsuite/gcc.target/arm/neon-compare-2.c
new file mode 100644 (file)
index 0000000..559c5e5
--- /dev/null
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1 -funsafe-math-optimizations" }  */
+/* { dg-add-options arm_neon } */
+
+#ifndef ELEM_TYPE
+#define ELEM_TYPE float
+#endif
+#ifndef INT_ELEM_TYPE
+#define INT_ELEM_TYPE __INT32_TYPE__
+#endif
+
+#define COMPARE(NAME, OP) \
+  int_vec \
+  cmp_##NAME##_reg (vec a, vec b) \
+  { \
+    return a OP b; \
+  } \
+  \
+  int_vec \
+  cmp_##NAME##_zero (vec a) \
+  { \
+    return a OP (vec) {}; \
+  }
+
+typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16)));
+typedef ELEM_TYPE vec __attribute__((vector_size(16)));
+
+COMPARE (eq, ==)
+COMPARE (ne, !=)
+COMPARE (lt, <)
+COMPARE (le, <=)
+COMPARE (gt, >)
+COMPARE (ge, >=)
+
+/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-3.c b/gcc/testsuite/gcc.target/arm/neon-compare-3.c
new file mode 100644 (file)
index 0000000..efbe797
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" }  */
+/* { dg-add-options arm_neon } */
+
+#define ult(a, b) (!__builtin_isgreaterequal (a, b))
+#define ule(a, b) (!__builtin_isgreater (a, b))
+#define ugt(a, b) (!__builtin_islessequal (a, b))
+#define uge(a, b) (!__builtin_isless (a, b))
+
+int x[16];
+float a[16];
+float b[16];
+
+#define COMPARE(NAME) \
+  void \
+  cmp_##NAME##_reg (void) \
+  { \
+    for (int i = 0; i < 16; ++i) \
+      x[i] = NAME (a[i], b[i]) ? 2 : 0; \
+  } \
+  \
+  void \
+  cmp_##NAME##_zero (void) \
+  { \
+    for (int i = 0; i < 16; ++i) \
+      x[i] = NAME (a[i], 0) ? 2 : 0; \
+  }
+
+typedef int int_vec __attribute__((vector_size(16)));
+typedef float vec __attribute__((vector_size(16)));
+
+COMPARE (ult)
+COMPARE (ule)
+COMPARE (ugt)
+COMPARE (uge)
+
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-4.c b/gcc/testsuite/gcc.target/arm/neon-compare-4.c
new file mode 100644 (file)
index 0000000..3f8cc90
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" }  */
+/* { dg-add-options arm_neon } */
+
+#define ordered(a, b) (!__builtin_isunordered (a, b))
+#define unordered(a, b) (__builtin_isunordered (a, b))
+
+int x[16];
+float a[16];
+float b[16];
+
+#define COMPARE(NAME) \
+  void \
+  cmp_##NAME##_reg (void) \
+  { \
+    for (int i = 0; i < 16; ++i) \
+      x[i] = NAME (a[i], b[i]) ? 2 : 0; \
+  } \
+  \
+  void \
+  cmp_##NAME##_zero (void) \
+  { \
+    for (int i = 0; i < 16; ++i) \
+      x[i] = NAME (a[i], 0) ? 2 : 0; \
+  }
+
+typedef int int_vec __attribute__((vector_size(16)));
+typedef float vec __attribute__((vector_size(16)));
+
+COMPARE (ordered)
+COMPARE (unordered)
+
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-5.c b/gcc/testsuite/gcc.target/arm/neon-compare-5.c
new file mode 100644 (file)
index 0000000..cb6428d
--- /dev/null
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" }  */
+/* { dg-add-options arm_neon } */
+
+#define uneq(a, b) (!__builtin_islessgreater (a, b))
+/* RTL's LTGT is a signaling comparison.  */
+#define ltgt(a, b) (a < b || b < a)
+
+int x[16];
+float a[16];
+float b[16];
+
+#define COMPARE(NAME) \
+  void \
+  cmp_##NAME##_reg (void) \
+  { \
+    for (int i = 0; i < 16; ++i) \
+      x[i] = NAME (a[i], b[i]) ? 2 : 0; \
+  } \
+  \
+  void \
+  cmp_##NAME##_zero (void) \
+  { \
+    for (int i = 0; i < 16; ++i) \
+      x[i] = NAME (a[i], 0) ? 2 : 0; \
+  }
+
+typedef int int_vec __attribute__((vector_size(16)));
+typedef float vec __attribute__((vector_size(16)));
+
+COMPARE (uneq)
+COMPARE (ltgt)
+
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */
index 8e9f37851698b0d85bd053ebad7800e0c1b4aca3..9f601a169d1022a1acc8ae0628a1c78e790201ae 100644 (file)
@@ -13,5 +13,5 @@ void foo (int ilast,float* w, float* w2)
   }
 }
 
-/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
 /* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
index c8306e364a3fc3f6c41e9fb0a71c100e05872830..74bc22046ad76f8a165ee91bf5452a883cc23245 100644 (file)
@@ -13,6 +13,7 @@ void foo (int ilast,float* w, float* w2)
   }
 }
 
-/* { dg-final { scan-assembler-times "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
+/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
 /* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
 /* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
index 3bb67d3afe3e05d8104fedd34d58882fb3ed64e0..8d3187541e221b6a1edd756f4d54c16ebf18f2c1 100644 (file)
@@ -13,7 +13,7 @@ void foo (int ilast,float* w, float* w2)
   }
 }
 
-/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
-/* { dg-final { scan-assembler "vcge\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
+/* { dg-final { scan-assembler "vcle\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */
 /* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
 /* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */
index 38af6784de84d7da97f607e8bda197481a6d4e3d..15f0649f8aebdbce28016256df4bbd05d3a81d1a 100644 (file)
@@ -7233,6 +7233,8 @@ proc check_effective_target_vect_cond_mixed { } {
       expr { [istarget i?86-*-*] || [istarget x86_64-*-*]
             || [istarget aarch64*-*-*]
             || [istarget powerpc*-*-*] 
+            || ([istarget arm*-*-*]
+                && [check_effective_target_arm_neon_ok])
             || ([istarget mips*-*-*]
                 && [et-is-effective-target mips_msa])
             || ([istarget s390*-*-*]