+2019-10-31 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.target/aarch64/sve/vcond_4.c: Split parts out into...
+ * gcc.target/aarch64/sve/vcond_4_costly.c,
+ * gcc.target/aarch64/sve/vcond_4_sel.c,
+ * gcc.target/aarch64/sve/vcond_4_zero.c: ...these new tests.
+ * gcc.target/aarch64/sve/vcond_4_run.c: Adjust accordingly.
+ * gcc.target/aarch64/sve/vcond_5.c: Likewise.
+
2019-10-31 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/reduc_strict_3.c: Split all but the
{ \
for (int i = 0; i < count; ++i) \
dest[i] = CMP (a[i], b[i]) ? src[i] : fallback; \
- } \
- \
- void __attribute__ ((noinline, noclone)) \
- test_##TYPE1##_##TYPE2##_##CMP##_zero (TYPE1 *restrict dest, \
- TYPE1 *restrict src, \
- TYPE1 fallback, \
- TYPE2 *restrict a, \
- int count) \
- { \
- for (int i = 0; i < count; ++i) \
- dest[i] = CMP (a[i], 0) ? src[i] : fallback; \
- } \
- \
- void __attribute__ ((noinline, noclone)) \
- test_##TYPE1##_##TYPE2##_##CMP##_sel (TYPE1 *restrict dest, \
- TYPE1 if_true, \
- TYPE1 if_false, \
- TYPE2 *restrict a, \
- TYPE2 b, int count) \
- { \
- for (int i = 0; i < count; ++i) \
- dest[i] = CMP (a[i], b) ? if_true : if_false; \
}
#define TEST_CMP(CMP) \
TEST_LOOP (int32_t, float, CMP) \
TEST_LOOP (uint32_t, float, CMP) \
- TEST_LOOP (int64_t, float, CMP) \
- TEST_LOOP (uint64_t, float, CMP) \
TEST_LOOP (float, float, CMP) \
- TEST_LOOP (int32_t, double, CMP) \
- TEST_LOOP (uint32_t, double, CMP) \
TEST_LOOP (int64_t, double, CMP) \
TEST_LOOP (uint64_t, double, CMP) \
TEST_LOOP (double, double, CMP)
TEST_CMP (nuge)
TEST_CMP (nugt)
-/* See PR 86753 for the reason behind the XFAILs. */
-
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 5 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 10 { xfail *-*-* } } } */
-
-/* 5 for ne, 5 for ueq and 5 for nueq. */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
-
-/* 5 for lt, 5 for ult and 5 for nult. */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
-
-/* 5 for le, 5 for ule and 5 for nule. */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
-
-/* 5 for gt, 5 for ugt and 5 for nugt. */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
-
-/* 5 for ge, 5 for uge and 5 for nuge. */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
-
-/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
-/* 3 loops * 5 invocations for all 12 unordered comparisons. */
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 } } */
-
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 7 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 14 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
+/* 3 for ne, 3 for ueq and 3 for nueq. */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+/* 3 for olt, 3 for ult and 3 for nult. */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+/* 3 for ole, 3 for ule and 3 for nule. */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+/* 3 for ogt, 3 for ugt and 3 for nugt. */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+/* 3 for oge, 3 for uge and 3 for nuge. */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
-/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
- for all 12 unordered comparisons. */
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 } } */
+/* 3 invocations for all 12 unordered comparisons. */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#include <stdint.h>
+
+#define eq(A, B) ((A) == (B))
+#define ne(A, B) ((A) != (B))
+#define olt(A, B) ((A) < (B))
+#define ole(A, B) ((A) <= (B))
+#define oge(A, B) ((A) >= (B))
+#define ogt(A, B) ((A) > (B))
+#define ordered(A, B) (!__builtin_isunordered (A, B))
+#define unordered(A, B) (__builtin_isunordered (A, B))
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
+#define ult(A, B) (__builtin_isless (A, B))
+#define ule(A, B) (__builtin_islessequal (A, B))
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
+#define ugt(A, B) (__builtin_isgreater (A, B))
+#define nueq(A, B) (__builtin_islessgreater (A, B))
+#define nult(A, B) (!__builtin_isless (A, B))
+#define nule(A, B) (!__builtin_islessequal (A, B))
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
+#define nugt(A, B) (!__builtin_isgreater (A, B))
+
+#define TEST_LOOP(TYPE1, TYPE2, CMP) \
+ void __attribute__ ((noinline, noclone)) \
+ test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
+ TYPE1 *restrict src, \
+ TYPE1 fallback, \
+ TYPE2 *restrict a, \
+ TYPE2 *restrict b, \
+ int count) \
+ { \
+ for (int i = 0; i < count; ++i) \
+ dest[i] = CMP (a[i], b[i]) ? src[i] : fallback; \
+ }
+
+#define TEST_CMP(CMP) \
+ TEST_LOOP (int64_t, float, CMP) \
+ TEST_LOOP (uint64_t, float, CMP) \
+ TEST_LOOP (int32_t, double, CMP) \
+ TEST_LOOP (uint32_t, double, CMP)
+
+TEST_CMP (eq)
+TEST_CMP (ne)
+TEST_CMP (olt)
+TEST_CMP (ole)
+TEST_CMP (oge)
+TEST_CMP (ogt)
+TEST_CMP (ordered)
+TEST_CMP (unordered)
+TEST_CMP (ueq)
+TEST_CMP (ult)
+TEST_CMP (ule)
+TEST_CMP (uge)
+TEST_CMP (ugt)
+TEST_CMP (nueq)
+TEST_CMP (nult)
+TEST_CMP (nule)
+TEST_CMP (nuge)
+TEST_CMP (nugt)
+
+/* 2 each for: eq, ne, ueq, nueq. */
+/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 8 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 16 { xfail *-*-* } } } */
+
+/* 2 each for: olt, ult, nult, ogt, ugt, nugt. */
+/* { dg-final { scan-assembler-times {\tfcm[lg]t\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcm[lg]t\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 24 } } */
+
+/* 2 each for: ole, ule, nule, oge, uge, nuge. */
+/* { dg-final { scan-assembler-times {\tfcm[lg]e\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcm[lg]e\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 24 } } */
+
+/* 2 invocations for all 12 unordered comparisons. */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 24 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 48 } } */
#define RUN_LOOP(TYPE1, TYPE2, CMP, EXPECT_INVALID) \
{ \
- TYPE1 dest1[N], dest2[N], dest3[N], src[N]; \
+ TYPE1 dest[N], src[N]; \
TYPE2 a[N], b[N]; \
for (int i = 0; i < N; ++i) \
{ \
asm volatile ("" ::: "memory"); \
} \
feclearexcept (FE_ALL_EXCEPT); \
- test_##TYPE1##_##TYPE2##_##CMP##_var (dest1, src, 11, a, b, N); \
- test_##TYPE1##_##TYPE2##_##CMP##_zero (dest2, src, 22, a, N); \
- test_##TYPE1##_##TYPE2##_##CMP##_sel (dest3, 33, 44, a, 9, N); \
+ test_##TYPE1##_##TYPE2##_##CMP##_var (dest, src, 11, a, b, N); \
if (TEST_EXCEPTIONS \
&& !fetestexcept (FE_INVALID) != !(EXPECT_INVALID)) \
__builtin_abort (); \
for (int i = 0; i < N; ++i) \
- { \
- if (dest1[i] != (CMP (a[i], b[i]) ? src[i] : 11)) \
- __builtin_abort (); \
- if (dest2[i] != (CMP (a[i], 0) ? src[i] : 22)) \
- __builtin_abort (); \
- if (dest3[i] != (CMP (a[i], 9) ? 33 : 44)) \
- __builtin_abort (); \
- } \
+ if (dest[i] != (CMP (a[i], b[i]) ? src[i] : 11)) \
+ __builtin_abort (); \
}
#define RUN_CMP(CMP, EXPECT_INVALID) \
RUN_LOOP (int32_t, float, CMP, EXPECT_INVALID) \
RUN_LOOP (uint32_t, float, CMP, EXPECT_INVALID) \
- RUN_LOOP (int64_t, float, CMP, EXPECT_INVALID) \
- RUN_LOOP (uint64_t, float, CMP, EXPECT_INVALID) \
RUN_LOOP (float, float, CMP, EXPECT_INVALID) \
- RUN_LOOP (int32_t, double, CMP, EXPECT_INVALID) \
- RUN_LOOP (uint32_t, double, CMP, EXPECT_INVALID) \
RUN_LOOP (int64_t, double, CMP, EXPECT_INVALID) \
RUN_LOOP (uint64_t, double, CMP, EXPECT_INVALID) \
RUN_LOOP (double, double, CMP, EXPECT_INVALID)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define eq(A, B) ((A) == (B))
+#define ne(A, B) ((A) != (B))
+#define olt(A, B) ((A) < (B))
+#define ole(A, B) ((A) <= (B))
+#define oge(A, B) ((A) >= (B))
+#define ogt(A, B) ((A) > (B))
+#define ordered(A, B) (!__builtin_isunordered (A, B))
+#define unordered(A, B) (__builtin_isunordered (A, B))
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
+#define ult(A, B) (__builtin_isless (A, B))
+#define ule(A, B) (__builtin_islessequal (A, B))
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
+#define ugt(A, B) (__builtin_isgreater (A, B))
+#define nueq(A, B) (__builtin_islessgreater (A, B))
+#define nult(A, B) (!__builtin_isless (A, B))
+#define nule(A, B) (!__builtin_islessequal (A, B))
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
+#define nugt(A, B) (!__builtin_isgreater (A, B))
+
+#define TEST_LOOP(TYPE1, TYPE2, CMP) \
+ void __attribute__ ((noinline, noclone)) \
+ test_##TYPE1##_##TYPE2##_##CMP##_sel (TYPE1 *restrict dest, \
+ TYPE1 if_true, \
+ TYPE1 if_false, \
+ TYPE2 *restrict a, \
+ TYPE2 b, int count) \
+ { \
+ for (int i = 0; i < count; ++i) \
+ dest[i] = CMP (a[i], b) ? if_true : if_false; \
+ }
+
+#define TEST_CMP(CMP) \
+ TEST_LOOP (int32_t, float, CMP) \
+ TEST_LOOP (uint32_t, float, CMP) \
+ TEST_LOOP (float, float, CMP) \
+ TEST_LOOP (int64_t, double, CMP) \
+ TEST_LOOP (uint64_t, double, CMP) \
+ TEST_LOOP (double, double, CMP)
+
+TEST_CMP (eq)
+TEST_CMP (ne)
+TEST_CMP (olt)
+TEST_CMP (ole)
+TEST_CMP (oge)
+TEST_CMP (ogt)
+TEST_CMP (ordered)
+TEST_CMP (unordered)
+TEST_CMP (ueq)
+TEST_CMP (ult)
+TEST_CMP (ule)
+TEST_CMP (uge)
+TEST_CMP (ugt)
+TEST_CMP (nueq)
+TEST_CMP (nult)
+TEST_CMP (nule)
+TEST_CMP (nuge)
+TEST_CMP (nugt)
+
+/* 3 each for: eq, ne, ueq, nueq. */
+/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
+
+/* 3 each for: olt, ult, nult, ogt, ugt, nugt. */
+/* { dg-final { scan-assembler-times {\tfcm[lg]t\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 18 } } */
+/* { dg-final { scan-assembler-times {\tfcm[lg]t\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 18 } } */
+
+/* 3 each for: ole, ule, nule, oge, uge, nuge. */
+/* { dg-final { scan-assembler-times {\tfcm[lg]e\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 18 } } */
+/* { dg-final { scan-assembler-times {\tfcm[lg]e\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 18 } } */
+
+/* 3 invocations for all 12 unordered comparisons. */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define eq(A, B) ((A) == (B))
+#define ne(A, B) ((A) != (B))
+#define olt(A, B) ((A) < (B))
+#define ole(A, B) ((A) <= (B))
+#define oge(A, B) ((A) >= (B))
+#define ogt(A, B) ((A) > (B))
+#define ordered(A, B) (!__builtin_isunordered (A, B))
+#define unordered(A, B) (__builtin_isunordered (A, B))
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
+#define ult(A, B) (__builtin_isless (A, B))
+#define ule(A, B) (__builtin_islessequal (A, B))
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
+#define ugt(A, B) (__builtin_isgreater (A, B))
+#define nueq(A, B) (__builtin_islessgreater (A, B))
+#define nult(A, B) (!__builtin_isless (A, B))
+#define nule(A, B) (!__builtin_islessequal (A, B))
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
+#define nugt(A, B) (!__builtin_isgreater (A, B))
+
+#define TEST_LOOP(TYPE1, TYPE2, CMP) \
+ void __attribute__ ((noinline, noclone)) \
+ test_##TYPE1##_##TYPE2##_##CMP##_zero (TYPE1 *restrict dest, \
+ TYPE1 *restrict src, \
+ TYPE1 fallback, \
+ TYPE2 *restrict a, \
+ int count) \
+ { \
+ for (int i = 0; i < count; ++i) \
+ dest[i] = CMP (a[i], 0) ? src[i] : fallback; \
+ }
+
+#define TEST_CMP(CMP) \
+ TEST_LOOP (int32_t, float, CMP) \
+ TEST_LOOP (uint32_t, float, CMP) \
+ TEST_LOOP (float, float, CMP) \
+ TEST_LOOP (int64_t, double, CMP) \
+ TEST_LOOP (uint64_t, double, CMP) \
+ TEST_LOOP (double, double, CMP)
+
+TEST_CMP (eq)
+TEST_CMP (ne)
+TEST_CMP (olt)
+TEST_CMP (ole)
+TEST_CMP (oge)
+TEST_CMP (ogt)
+TEST_CMP (ordered)
+TEST_CMP (unordered)
+TEST_CMP (ueq)
+TEST_CMP (ult)
+TEST_CMP (ule)
+TEST_CMP (uge)
+TEST_CMP (ugt)
+TEST_CMP (nueq)
+TEST_CMP (nult)
+TEST_CMP (nule)
+TEST_CMP (nuge)
+TEST_CMP (nugt)
+
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 3 } } */
+
+/* 3 for ne, 3 for ueq and 3 for nueq. */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 9 } } */
+
+/* 3 for olt, 3 for ult and 3 for nult. */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 9 } } */
+
+/* 3 for ole, 3 for ule and 3 for nule. */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 9 } } */
+
+/* 3 for ogt, 3 for ugt and 3 for nugt. */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 9 } } */
+
+/* 3 for oge, 3 for uge and 3 for nuge. */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 9 } } */
+
+/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
+/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
+/* 3 invocations for all 12 unordered comparisons. */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */
#include "vcond_4.c"
-/* See PR 86753 for the reason behind the XFAILs. */
+/* 3 for eq, 3 for ueq. */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
-/* 5 for eqand 5 for ueq. */
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 10 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 20 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 5 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 10 { xfail *-*-* } } } */
+/* 3 each for: olt, ult, nult and nueq. */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
-/* 5 for lt, 5 for ult, 5 for nueq and 5 for nult. */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
+/* 3 for ole, 3 for ule and 3 for nule. */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* 5 for le, 5 for ule and 5 for nule. */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
+/* 3 each for: ogt, ugt, nugt and nueq. */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
-/* 5 for gt, 5 for ugt, 5 for nueq and 5 for nugt. */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
+/* 3 for oge, 3 for uge and 3 for nuge. */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* 5 for ge, 5 for uge and 5 for nuge. */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
-
-/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} } } */
-/* 3 loops * 5 invocations for ordered, unordered amd ueq. */
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 45 { xfail *-*-* } } } */
-
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 14 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 28 { xfail *-*-* } } } */
-
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 7 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 14 { xfail *-*-* } } } */
-
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
-
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
-
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
-
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
-
-/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} } } */
-/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
- for ordered, unordered and ueq. */
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 63 { xfail *-*-* } } } */
+/* 3 for ordered, 3 for unordered and 3 for ueq. */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */