+2018-05-25 Richard Sandiford <richard.sandiford@linaro.org>
+
+ * doc/sourcebuild.texi (vect_double_cond_arith: Document.
+ * gimple-match.h (gimple_match_op::MAX_NUM_OPS): Bump to 4.
+ (gimple_match_op::gimple_match_op): Add an overload for 4 operands.
+ (gimple_match_op::set_op): Likewise.
+ (gimple_resimplify4): Declare.
+ * genmatch.c (get_operand_type): Handle CFN_COND_* functions.
+ (expr::gen_transform): Likewise.
+ (decision_tree::gen): Generate a simplification routine for 4 operands.
+ * gimple-match-head.c (gimple_simplify): Add an overload for
+ 4 operands. In the top-level function, handle up to 4 call
+ arguments and call gimple_resimplify4.
+ (gimple_resimplify4): New function.
+ (build_call_internal): Pass a fourth operand.
+ (maybe_push_to_seq): Likewise.
+ * match.pd (UNCOND_BINARY, COND_BINARY): New operator lists.
+ Fold VEC_COND_EXPRs of an operation and a default value into
+ an IFN_COND_* function if possible.
+ * config/aarch64/iterators.md (UNSPEC_COND_MAX, UNSPEC_COND_MIN):
+ New unspecs.
+ (SVE_COND_FP_BINARY): Include them.
+ (optab, sve_fp_op): Handle them.
+ (SVE_INT_BINARY_REV): New code iterator.
+ (SVE_COND_FP_BINARY_REV): New int iterator.
+ (commutative): New int attribute.
+ * config/aarch64/aarch64-protos.h (aarch64_sve_prepare_conditional_op):
+ Declare.
+ * config/aarch64/aarch64.c (aarch64_sve_prepare_conditional_op): New
+ function.
+ * config/aarch64/aarch64-sve.md (cond_<optab><mode>): Use it.
+ (*cond_<optab><mode>): New patterns for reversed operands.
+
2018-05-25 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (STMT_VINFO_GROUP_*, GROUP_*): Remove.
void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
+void aarch64_sve_prepare_conditional_op (rtx *, unsigned int, bool);
#endif /* RTX_CODE */
void aarch64_init_builtins (void);
UNSPEC_SEL))]
"TARGET_SVE"
{
- gcc_assert (rtx_equal_p (operands[2], operands[4]));
+ bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
+ aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
})
;; Predicated integer operations.
"<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
)
+;; Predicated integer operations with the operands reversed.
+(define_insn "*cond_<optab><mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (SVE_INT_BINARY_REV:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w")
+ (match_operand:SVE_I 3 "register_operand" "0"))
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+)
+
;; Set operand 0 to the last active element in operand 3, or to tied
;; operand 1 if no elements are active.
(define_insn "fold_extract_last_<mode>"
UNSPEC_SEL))]
"TARGET_SVE"
{
- gcc_assert (rtx_equal_p (operands[2], operands[4]));
+ aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
})
;; Predicated floating-point operations.
"<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
)
+;; Predicated floating-point operations with the operands reversed.
+(define_insn "*cond_<optab><mode>"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unspec:SVE_F
+ [(match_dup 1)
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "0")]
+ SVE_COND_FP_BINARY)
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "<sve_fp_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+)
+
;; Shift an SVE vector left and insert a scalar into element 0.
(define_insn "vec_shl_insert_<mode>"
[(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
}
+/* Prepare a cond_<optab><mode> operation that has the operands
+ given by OPERANDS, where:
+
+ - operand 0 is the destination
+ - operand 1 is a predicate
+ - operands 2 to NOPS - 2 are the operands to an operation that is
+ performed for active lanes
+ - operand NOPS - 1 specifies the values to use for inactive lanes.
+
+ COMMUTATIVE_P is true if operands 2 and 3 are commutative. In that case,
+ no pattern is provided for a tie between operands 3 and NOPS - 1. */
+
+void
+aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops,
+ bool commutative_p)
+{
+ /* We can do the operation directly if the "else" value matches one
+ of the other inputs. */
+ for (unsigned int i = 2; i < nops - 1; ++i)
+ if (rtx_equal_p (operands[i], operands[nops - 1]))
+ {
+ if (i == 3 && commutative_p)
+ std::swap (operands[2], operands[3]);
+ return;
+ }
+
+ /* If the "else" value is different from the other operands, we have
+ the choice of doing a SEL on the output or a SEL on an input.
+ Neither choice is better in all cases, but one advantage of
+ selecting the input is that it can avoid a move when the output
+ needs to be distinct from the inputs. E.g. if operand N maps to
+ register N, selecting the output would give:
+
+ MOVPRFX Z0.S, Z2.S
+ ADD Z0.S, P1/M, Z0.S, Z3.S
+ SEL Z0.S, P1, Z0.S, Z4.S
+
+ whereas selecting the input avoids the MOVPRFX:
+
+ SEL Z0.S, P1, Z2.S, Z4.S
+ ADD Z0.S, P1/M, Z0.S, Z3.S. */
+ machine_mode mode = GET_MODE (operands[0]);
+ rtx temp = gen_reg_rtx (mode);
+ rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]);
+ emit_set_insn (temp, gen_rtx_UNSPEC (mode, vec, UNSPEC_SEL));
+ operands[2] = operands[nops - 1] = temp;
+}
+
/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
true. However due to issues with register allocation it is preferable
to avoid tieing integer scalar and FP scalar modes. Executing integer
UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md.
UNSPEC_COND_ADD ; Used in aarch64-sve.md.
UNSPEC_COND_SUB ; Used in aarch64-sve.md.
+ UNSPEC_COND_MAX ; Used in aarch64-sve.md.
+ UNSPEC_COND_MIN ; Used in aarch64-sve.md.
UNSPEC_COND_LT ; Used in aarch64-sve.md.
UNSPEC_COND_LE ; Used in aarch64-sve.md.
UNSPEC_COND_EQ ; Used in aarch64-sve.md.
(define_code_iterator SVE_INT_BINARY [plus minus smax umax smin umin
and ior xor])
+(define_code_iterator SVE_INT_BINARY_REV [minus])
+
;; SVE integer comparisons.
(define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
(define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART])
-(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB])
+(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB
+ UNSPEC_COND_MAX UNSPEC_COND_MIN])
+
+(define_int_iterator SVE_COND_FP_BINARY_REV [UNSPEC_COND_SUB])
(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_LT UNSPEC_COND_LE
UNSPEC_COND_EQ UNSPEC_COND_NE
(UNSPEC_IORV "ior")
(UNSPEC_XORV "xor")
(UNSPEC_COND_ADD "add")
- (UNSPEC_COND_SUB "sub")])
+ (UNSPEC_COND_SUB "sub")
+ (UNSPEC_COND_MAX "smax")
+ (UNSPEC_COND_MIN "smin")])
(define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax")
(UNSPEC_UMINV "umin")
(UNSPEC_COND_GT "gt")])
(define_int_attr sve_fp_op [(UNSPEC_COND_ADD "fadd")
- (UNSPEC_COND_SUB "fsub")])
+ (UNSPEC_COND_SUB "fsub")
+ (UNSPEC_COND_MAX "fmaxnm")
+ (UNSPEC_COND_MIN "fminnm")])
+
+(define_int_attr commutative [(UNSPEC_COND_ADD "true")
+ (UNSPEC_COND_SUB "false")
+ (UNSPEC_COND_MIN "true")
+ (UNSPEC_COND_MAX "true")])
@item vect_double
Target supports hardware vectors of @code{double}.
+@item vect_double_cond_arith
+Target supports conditional addition, subtraction, minimum and maximum
+on vectors of @code{double}, via the @code{cond_} optabs.
+
@item vect_element_align_preferred
The target's preferred vector alignment is the same as the element
alignment.
else if (*op == COND_EXPR
&& pos == 0)
return "boolean_type_node";
+ else if (strncmp (op->id, "CFN_COND_", 9) == 0)
+ {
+ /* IFN_COND_* operands 1 and later by default have the same type
+ as the result. The type of operand 0 needs to be specified
+ explicitly. */
+ if (pos > 0 && expr_type)
+ return expr_type;
+ else if (pos > 0 && in_type)
+ return in_type;
+ else
+ return NULL;
+ }
else
{
/* Otherwise all types should match - choose one in order of
in_type = NULL;
}
else if (*opr == COND_EXPR
- || *opr == VEC_COND_EXPR)
+ || *opr == VEC_COND_EXPR
+ || strncmp (opr->id, "CFN_COND_", 9) == 0)
{
/* Conditions are of the same type as their first alternative. */
sprintf (optype, "TREE_TYPE (ops%d[1])", depth);
}
fprintf (stderr, "removed %u duplicate tails\n", rcnt);
- for (unsigned n = 1; n <= 3; ++n)
+ for (unsigned n = 1; n <= 4; ++n)
{
/* First generate split-out functions. */
for (unsigned i = 0; i < root->kids.length (); i++)
code_helper, tree, tree, tree);
static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
code_helper, tree, tree, tree, tree);
+static bool gimple_simplify (gimple_match_op *, gimple_seq *, tree (*)(tree),
+ code_helper, tree, tree, tree, tree, tree);
const unsigned int gimple_match_op::MAX_NUM_OPS;
return canonicalized;
}
+/* Helper that matches and simplifies the toplevel result from
+ a gimple_simplify run (where we don't want to build
+ a stmt in case it's used in in-place folding). Replaces
+ RES_OP with a simplified and/or canonicalized result and
+ returns whether any change was made. */
+
+bool
+gimple_resimplify4 (gimple_seq *seq, gimple_match_op *res_op,
+ tree (*valueize)(tree))
+{
+ /* No constant folding is defined for four-operand functions. */
+
+ gimple_match_op res_op2 (*res_op);
+ if (gimple_simplify (&res_op2, seq, valueize,
+ res_op->code, res_op->type,
+ res_op->ops[0], res_op->ops[1], res_op->ops[2],
+ res_op->ops[3]))
+ {
+ *res_op = res_op2;
+ return true;
+ }
+
+ return false;
+}
/* If in GIMPLE the operation described by RES_OP should be single-rhs,
build a GENERIC tree for that expression and update RES_OP accordingly. */
return gimple_build_call_internal (fn, res_op->num_ops,
res_op->op_or_null (0),
res_op->op_or_null (1),
- res_op->op_or_null (2));
+ res_op->op_or_null (2),
+ res_op->op_or_null (3));
}
/* Push the exploded expression described by RES_OP as a statement to
new_stmt = gimple_build_call (decl, num_ops,
res_op->op_or_null (0),
res_op->op_or_null (1),
- res_op->op_or_null (2));
+ res_op->op_or_null (2),
+ res_op->op_or_null (3));
}
if (!res)
{
/* ??? This way we can't simplify calls with side-effects. */
if (gimple_call_lhs (stmt) != NULL_TREE
&& gimple_call_num_args (stmt) >= 1
- && gimple_call_num_args (stmt) <= 3)
+ && gimple_call_num_args (stmt) <= 4)
{
bool valueized = false;
combined_fn cfn;
case 3:
return (gimple_resimplify3 (seq, res_op, valueize)
|| valueized);
+ case 4:
+ return (gimple_resimplify4 (seq, res_op, valueize)
+ || valueized);
default:
gcc_unreachable ();
}
gimple_match_op (code_helper, tree, tree);
gimple_match_op (code_helper, tree, tree, tree);
gimple_match_op (code_helper, tree, tree, tree, tree);
+ gimple_match_op (code_helper, tree, tree, tree, tree, tree);
void set_op (code_helper, tree, unsigned int);
void set_op (code_helper, tree, tree);
void set_op (code_helper, tree, tree, tree);
void set_op (code_helper, tree, tree, tree, tree);
+ void set_op (code_helper, tree, tree, tree, tree, tree);
void set_value (tree);
tree op_or_null (unsigned int) const;
/* The maximum value of NUM_OPS. */
- static const unsigned int MAX_NUM_OPS = 3;
+ static const unsigned int MAX_NUM_OPS = 4;
/* The operation being performed. */
code_helper code;
ops[2] = op2;
}
+inline
+gimple_match_op::gimple_match_op (code_helper code_in, tree type_in,
+ tree op0, tree op1, tree op2, tree op3)
+ : code (code_in), type (type_in), num_ops (4)
+{
+ ops[0] = op0;
+ ops[1] = op1;
+ ops[2] = op2;
+ ops[3] = op3;
+}
+
/* Change the operation performed to CODE_IN, the type of the result to
TYPE_IN, and the number of operands to NUM_OPS_IN. The caller needs
to set the operands itself. */
ops[2] = op2;
}
+inline void
+gimple_match_op::set_op (code_helper code_in, tree type_in,
+ tree op0, tree op1, tree op2, tree op3)
+{
+ code = code_in;
+ type = type_in;
+ num_ops = 4;
+ ops[0] = op0;
+ ops[1] = op1;
+ ops[2] = op2;
+ ops[3] = op3;
+}
+
/* Set the "operation" to be the single value VALUE, such as a constant
or SSA_NAME. */
bool gimple_resimplify1 (gimple_seq *, gimple_match_op *, tree (*)(tree));
bool gimple_resimplify2 (gimple_seq *, gimple_match_op *, tree (*)(tree));
bool gimple_resimplify3 (gimple_seq *, gimple_match_op *, tree (*)(tree));
+bool gimple_resimplify4 (gimple_seq *, gimple_match_op *, tree (*)(tree));
tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *,
tree res = NULL_TREE);
void maybe_build_generic_op (gimple_match_op *);
DEFINE_INT_AND_FLOAT_ROUND_FN (CEIL)
DEFINE_INT_AND_FLOAT_ROUND_FN (ROUND)
DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+
+/* Binary operations and their associated IFN_COND_* function. */
+(define_operator_list UNCOND_BINARY
+ plus minus
+ min max
+ bit_and bit_ior bit_xor)
+(define_operator_list COND_BINARY
+ IFN_COND_ADD IFN_COND_SUB
+ IFN_COND_MIN IFN_COND_MAX
+ IFN_COND_AND IFN_COND_IOR IFN_COND_XOR)
/* As opposed to convert?, this still creates a single pattern, so
it is not a suitable replacement for convert? in all cases. */
(simplify
(cmp (popcount @0) integer_zerop)
(rep @0 { build_zero_cst (TREE_TYPE (@0)); }))))
+
+/* Simplify:
+
+ a = a1 op a2
+ r = c ? a : b;
+
+ to:
+
+ r = c ? a1 op a2 : b;
+
+ if the target can do it in one go. This makes the operation conditional
+ on c, so could drop potentially-trapping arithmetic, but that's a valid
+ simplification if the result of the operation isn't needed. */
+(for uncond_op (UNCOND_BINARY)
+ cond_op (COND_BINARY)
+ (simplify
+ (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
+ (with { tree op_type = TREE_TYPE (@4); }
+ (if (element_precision (type) == element_precision (op_type))
+ (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
+ (simplify
+ (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
+ (with { tree op_type = TREE_TYPE (@4); }
+ (if (element_precision (type) == element_precision (op_type))
+ (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
+2018-05-25 Richard Sandiford <richard.sandiford@linaro.org>
+
+ * lib/target-supports.exp
+ (check_effective_target_vect_double_cond_arith): New proc.
+ * gcc.dg/vect/vect-cond-arith-1.c: New test.
+ * gcc.target/aarch64/sve/vcond_8.c: Likewise.
+ * gcc.target/aarch64/sve/vcond_8_run.c: Likewise.
+ * gcc.target/aarch64/sve/vcond_9.c: Likewise.
+ * gcc.target/aarch64/sve/vcond_9_run.c: Likewise.
+ * gcc.target/aarch64/sve/vcond_12.c: Likewise.
+ * gcc.target/aarch64/sve/vcond_12_run.c: Likewise.
+
2018-05-25 Janus Weil <janus@gcc.gnu.org>
PR fortran/85839
--- /dev/null
+/* { dg-additional-options "-fdump-tree-optimized -fno-trapping-math -ffinite-math-only" } */
+
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS * 11 / 64 + 3)
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+
+#define DEF(OP) \
+ void __attribute__ ((noipa)) \
+ f_##OP (double *restrict a, double *restrict b, double x) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ { \
+ double truev = OP (b[i], x); \
+ a[i] = b[i] < 100 ? truev : b[i]; \
+ } \
+ }
+
+#define TEST(OP) \
+ { \
+ f_##OP (a, b, 10); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ int bval = (i % 17) * 10; \
+ int truev = OP (bval, 10); \
+ if (a[i] != (bval < 100 ? truev : bval)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+#define FOR_EACH_OP(T) \
+ T (add) \
+ T (sub) \
+ T (__builtin_fmax) \
+ T (__builtin_fmin)
+
+FOR_EACH_OP (DEF)
+
+int
+main (void)
+{
+ double a[N], b[N];
+ for (int i = 0; i < N; ++i)
+ {
+ b[i] = (i % 17) * 10;
+ asm volatile ("" ::: "memory");
+ }
+ FOR_EACH_OP (TEST)
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump { = \.COND_ADD} "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump { = \.COND_SUB} "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump { = \.COND_MAX} "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump { = \.COND_MIN} "optimized" { target vect_double_cond_arith } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include <stdint.h>
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+#define max(A, B) ((A) > (B) ? (A) : (B))
+#define min(A, B) ((A) < (B) ? (A) : (B))
+#define and(A, B) ((A) & (B))
+#define ior(A, B) ((A) | (B))
+#define xor(A, B) ((A) ^ (B))
+
+#define N 121
+
+#define DEF_LOOP(TYPE, CMPTYPE, OP) \
+ void __attribute__((noipa)) \
+ f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond, \
+ CMPTYPE limit, TYPE src2v, TYPE elsev) \
+ { \
+ TYPE induc = 0; \
+ for (unsigned int i = 0; i < N; ++i, induc += 1) \
+ { \
+ TYPE truev = OP (induc, src2v); \
+ dest[i] = cond[i] < limit ? truev : elsev; \
+ } \
+ }
+
+#define FOR_EACH_INT_TYPE(T, TYPE) \
+ T (TYPE, TYPE, add) \
+ T (TYPE, TYPE, sub) \
+ T (TYPE, TYPE, max) \
+ T (TYPE, TYPE, min) \
+ T (TYPE, TYPE, and) \
+ T (TYPE, TYPE, ior) \
+ T (TYPE, TYPE, xor)
+
+#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
+ T (TYPE, CMPTYPE, add) \
+ T (TYPE, CMPTYPE, sub) \
+ T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
+ T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
+
+#define FOR_EACH_LOOP(T) \
+ FOR_EACH_INT_TYPE (T, int8_t) \
+ FOR_EACH_INT_TYPE (T, int16_t) \
+ FOR_EACH_INT_TYPE (T, int32_t) \
+ FOR_EACH_INT_TYPE (T, int64_t) \
+ FOR_EACH_INT_TYPE (T, uint8_t) \
+ FOR_EACH_INT_TYPE (T, uint16_t) \
+ FOR_EACH_INT_TYPE (T, uint32_t) \
+ FOR_EACH_INT_TYPE (T, uint64_t) \
+ FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
+ FOR_EACH_FP_TYPE (T, float, float, f32) \
+ FOR_EACH_FP_TYPE (T, double, double, f64)
+
+FOR_EACH_LOOP (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
+
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b,} 14 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h,} 18 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s,} 18 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d,} 18 } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include "vcond_12.c"
+
+#define TEST_LOOP(TYPE, CMPTYPE, OP) \
+ { \
+ TYPE dest[N]; \
+ CMPTYPE cond[N]; \
+ for (unsigned int i = 0; i < N; ++i) \
+ cond[i] = i % 5; \
+ TYPE src2v = 14; \
+ TYPE elsev = 17; \
+ f_##OP##_##TYPE (dest, cond, 3, src2v, elsev); \
+ TYPE induc = 0; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE if_true = OP (induc, src2v); \
+ if (dest[i] != (i % 5 < 3 ? if_true : elsev)) \
+ __builtin_abort (); \
+ induc += 1; \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ FOR_EACH_LOOP (TEST_LOOP);
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include <stdint.h>
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+#define max(A, B) ((A) > (B) ? (A) : (B))
+#define min(A, B) ((A) < (B) ? (A) : (B))
+#define and(A, B) ((A) & (B))
+#define ior(A, B) ((A) | (B))
+#define xor(A, B) ((A) ^ (B))
+
+#define DEF_LOOP(TYPE, CMPTYPE, OP) \
+ void __attribute__((noipa)) \
+ f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond, \
+ CMPTYPE limit, TYPE *restrict src, \
+ TYPE val, unsigned int n) \
+ { \
+ for (unsigned int i = 0; i < n; ++i) \
+ { \
+ TYPE truev = OP (src[i], val); \
+ dest[i] = cond[i] < limit ? truev : src[i]; \
+ } \
+ }
+
+#define FOR_EACH_INT_TYPE(T, TYPE) \
+ T (TYPE, TYPE, add) \
+ T (TYPE, TYPE, sub) \
+ T (TYPE, TYPE, max) \
+ T (TYPE, TYPE, min) \
+ T (TYPE, TYPE, and) \
+ T (TYPE, TYPE, ior) \
+ T (TYPE, TYPE, xor)
+
+#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
+ T (TYPE, CMPTYPE, add) \
+ T (TYPE, CMPTYPE, sub) \
+ T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
+ T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
+
+#define FOR_EACH_LOOP(T) \
+ FOR_EACH_INT_TYPE (T, int8_t) \
+ FOR_EACH_INT_TYPE (T, int16_t) \
+ FOR_EACH_INT_TYPE (T, int32_t) \
+ FOR_EACH_INT_TYPE (T, int64_t) \
+ FOR_EACH_INT_TYPE (T, uint8_t) \
+ FOR_EACH_INT_TYPE (T, uint16_t) \
+ FOR_EACH_INT_TYPE (T, uint32_t) \
+ FOR_EACH_INT_TYPE (T, uint64_t) \
+ FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
+ FOR_EACH_FP_TYPE (T, float, float, f32) \
+ FOR_EACH_FP_TYPE (T, double, double, f64)
+
+FOR_EACH_LOOP (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include "vcond_8.c"
+
+#define N 187
+
+#define TEST_LOOP(TYPE, CMPTYPE, OP) \
+ { \
+ TYPE dest[N], src[N]; \
+ CMPTYPE cond[N]; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ src[i] = i * 3; \
+ cond[i] = i % 5; \
+ } \
+ f_##OP##_##TYPE (dest, cond, 3, src, 77, N); \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE if_false = i * 3; \
+ TYPE if_true = OP (if_false, (TYPE) 77); \
+ if (dest[i] != (i % 5 < 3 ? if_true : if_false)) \
+ __builtin_abort (); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ FOR_EACH_LOOP (TEST_LOOP);
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include <stdint.h>
+
+#define add(A, B) ((A) + (B))
+#define sub(A, B) ((A) - (B))
+#define max(A, B) ((A) > (B) ? (A) : (B))
+#define min(A, B) ((A) < (B) ? (A) : (B))
+#define and(A, B) ((A) & (B))
+#define ior(A, B) ((A) | (B))
+#define xor(A, B) ((A) ^ (B))
+
+#define DEF_LOOP(TYPE, CMPTYPE, OP) \
+ void __attribute__((noipa)) \
+ f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond, \
+ CMPTYPE limit, TYPE *restrict src1, \
+ TYPE *restrict src2, unsigned int n) \
+ { \
+ for (unsigned int i = 0; i < n; ++i) \
+ { \
+ TYPE truev = OP (src1[i], src2[i]); \
+ dest[i] = cond[i] < limit ? truev : src2[i]; \
+ } \
+ }
+
+#define FOR_EACH_INT_TYPE(T, TYPE) \
+ T (TYPE, TYPE, add) \
+ T (TYPE, TYPE, sub) \
+ T (TYPE, TYPE, max) \
+ T (TYPE, TYPE, min) \
+ T (TYPE, TYPE, and) \
+ T (TYPE, TYPE, ior) \
+ T (TYPE, TYPE, xor)
+
+#define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \
+ T (TYPE, CMPTYPE, add) \
+ T (TYPE, CMPTYPE, sub) \
+ T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \
+ T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX)
+
+#define FOR_EACH_LOOP(T) \
+ FOR_EACH_INT_TYPE (T, int8_t) \
+ FOR_EACH_INT_TYPE (T, int16_t) \
+ FOR_EACH_INT_TYPE (T, int32_t) \
+ FOR_EACH_INT_TYPE (T, int64_t) \
+ FOR_EACH_INT_TYPE (T, uint8_t) \
+ FOR_EACH_INT_TYPE (T, uint16_t) \
+ FOR_EACH_INT_TYPE (T, uint32_t) \
+ FOR_EACH_INT_TYPE (T, uint64_t) \
+ FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \
+ FOR_EACH_FP_TYPE (T, float, float, f32) \
+ FOR_EACH_FP_TYPE (T, double, double, f64)
+
+FOR_EACH_LOOP (DEF_LOOP)
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */
+
+#include "vcond_9.c"
+
+#define N 187
+
+#define TEST_LOOP(TYPE, CMPTYPE, OP) \
+ { \
+ TYPE dest[N], src1[N], src2[N]; \
+ CMPTYPE cond[N]; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ src1[i] = i * 4 - i % 7; \
+ src2[i] = i * 3 + 1; \
+ cond[i] = i % 5; \
+ } \
+ f_##OP##_##TYPE (dest, cond, 3, src1, src2, N); \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE src1v = i * 4 - i % 7; \
+ TYPE src2v = i * 3 + 1; \
+ TYPE if_true = OP (src1v, src2v); \
+ if (dest[i] != (i % 5 < 3 ? if_true : src2v)) \
+ __builtin_abort (); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ FOR_EACH_LOOP (TEST_LOOP);
+ return 0;
+}
return $et_vect_double_saved($et_index)
}
+# Return 1 if the target supports conditional addition, subtraction, minimum
+# and maximum on vectors of double, via the cond_ optabs. Return 0 otherwise.
+
+proc check_effective_target_vect_double_cond_arith { } {
+ return [check_effective_target_aarch64_sve]
+}
+
# Return 1 if the target supports hardware vectors of long long, 0 otherwise.
#
# This won't change for different subtargets so cache the result.