+2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
+ Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
+
+ * config/aarch64/aarch64.c (aarch64_bit_representation): New function.
+ (aarch64_print_vector_float_operand): Also handle 8-bit floats.
+ (aarch64_print_operand): Add support for %I.
+ (aarch64_sve_dup_immediate_p): Handle scalars as well as vectors.
+ Bitcast floating-point constants to the corresponding integer constant.
+ (aarch64_float_const_representable_p): Handle vectors as well
+ as scalars.
+ (aarch64_expand_sve_vcond): Make sure that the operands are valid
+ for the new vcond_mask_<mode><vpred> expander.
+ * config/aarch64/predicates.md (aarch64_sve_dup_immediate): Also
+ test aarch64_float_const_representable_p.
+ (aarch64_sve_reg_or_dup_imm): New predicate.
+ * config/aarch64/aarch64-sve.md (vec_extract<vpred><Vel>): Use
+ gen_vcond_mask_<mode><vpred> instead of
+ gen_aarch64_sve_dup<mode>_const.
+ (vcond_mask_<mode><vpred>): Turn into a define_expand that
+ accepts aarch64_sve_reg_or_dup_imm and aarch64_simd_reg_or_zero
+ for operands 1 and 2 respectively. Force operand 2 into a
+ register if operand 1 is a register. Fold old define_insn...
+ (aarch64_sve_dup<mode>_const): ...and this define_insn...
+ (*vcond_mask_<mode><vpred>): ...into this new pattern. Handle
+ floating-point constants that can be moved as integers. Add
+ alternatives for MOV /M and FMOV /M.
+ (vcond<mode><v_int_equiv>, vcondu<mode><v_int_equiv>)
+ (vcond<mode><v_fp_equiv>): Accept nonmemory_operand for operands
+ 1 and 2 respectively.
+ * config/aarch64/constraints.md (Ufc): Handle vectors as well
+ as scalars.
+ (vss): New constraint.
+
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/predicates.md (aarch64_sve_float_maxmin_immediate)
"TARGET_SVE"
{
rtx tmp = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
- CONST1_RTX (<MODE>mode),
- CONST0_RTX (<MODE>mode)));
+ emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
+ CONST1_RTX (<MODE>mode),
+ CONST0_RTX (<MODE>mode)));
emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
DONE;
}
;; ---- [INT,FP] Select based on predicates
;; -------------------------------------------------------------------------
;; Includes merging patterns for:
+;; - FMOV
;; - MOV
;; - SEL
;; -------------------------------------------------------------------------
;; vcond_mask operand order: true, false, mask
;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
;; SEL operand order: mask, true, false
-(define_insn "vcond_mask_<mode><vpred>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+(define_expand "vcond_mask_<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "register_operand")
(unspec:SVE_ALL
- [(match_operand:<VPRED> 3 "register_operand" "Upa")
- (match_operand:SVE_ALL 1 "register_operand" "w")
- (match_operand:SVE_ALL 2 "register_operand" "w")]
+ [(match_operand:<VPRED> 3 "register_operand")
+ (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
+ (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE"
- "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
+ {
+ if (register_operand (operands[1], <MODE>mode))
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+ }
)
-;; Selects between a duplicated immediate and zero.
-(define_insn "aarch64_sve_dup<mode>_const"
- [(set (match_operand:SVE_I 0 "register_operand" "=w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
- (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
+;; Selects between:
+;; - two registers
+;; - a duplicated immediate and a register
+;; - a duplicated immediate and zero
+(define_insn "*vcond_mask_<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
+ (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
+ (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
UNSPEC_SEL))]
- "TARGET_SVE"
- "mov\t%0.<Vetype>, %1/z, #%2"
+ "TARGET_SVE
+ && (!register_operand (operands[1], <MODE>mode)
+ || register_operand (operands[2], <MODE>mode))"
+ "@
+ sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
+ mov\t%0.<Vetype>, %3/m, #%I1
+ mov\t%0.<Vetype>, %3/z, #%I1
+ fmov\t%0.<Vetype>, %3/m, #%1
+ movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
+ movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
+ movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
+ [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
)
;; -------------------------------------------------------------------------
(match_operator 3 "comparison_operator"
[(match_operand:<V_INT_EQUIV> 4 "register_operand")
(match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
- (match_operand:SVE_ALL 1 "register_operand")
- (match_operand:SVE_ALL 2 "register_operand")))]
+ (match_operand:SVE_ALL 1 "nonmemory_operand")
+ (match_operand:SVE_ALL 2 "nonmemory_operand")))]
"TARGET_SVE"
{
aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
(match_operator 3 "comparison_operator"
[(match_operand:<V_INT_EQUIV> 4 "register_operand")
(match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
- (match_operand:SVE_ALL 1 "register_operand")
- (match_operand:SVE_ALL 2 "register_operand")))]
+ (match_operand:SVE_ALL 1 "nonmemory_operand")
+ (match_operand:SVE_ALL 2 "nonmemory_operand")))]
"TARGET_SVE"
{
aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
(match_operator 3 "comparison_operator"
[(match_operand:<V_FP_EQUIV> 4 "register_operand")
(match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
- (match_operand:SVE_HSD 1 "register_operand")
- (match_operand:SVE_HSD 2 "register_operand")))]
+ (match_operand:SVE_HSD 1 "nonmemory_operand")
+ (match_operand:SVE_HSD 2 "nonmemory_operand")))]
"TARGET_SVE"
{
aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
return DWARF_FRAME_REGISTERS;
}
+/* If X is a CONST_DOUBLE, return its bit representation as a constant
+ integer, otherwise return X unmodified. */
+static rtx
+aarch64_bit_representation (rtx x)
+{
+ if (CONST_DOUBLE_P (x))
+ x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
+ return x;
+}
+
/* Return true if MODE is any of the Advanced SIMD structure modes. */
static bool
aarch64_advsimd_struct_mode_p (machine_mode mode)
if (negate)
r = real_value_negate (&r);
- /* We only handle the SVE single-bit immediates here. */
+ /* Handle the SVE single-bit immediates specially, since they have a
+ fixed form in the assembly syntax. */
if (real_equal (&r, &dconst0))
asm_fprintf (f, "0.0");
else if (real_equal (&r, &dconst1))
else if (real_equal (&r, &dconsthalf))
asm_fprintf (f, "0.5");
else
- return false;
+ {
+ const int buf_size = 20;
+ char float_buf[buf_size] = {'\0'};
+ real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size,
+ 1, GET_MODE (elt));
+ asm_fprintf (f, "%s", float_buf);
+ }
return true;
}
and print it as an unsigned integer, in decimal.
'e': Print the sign/zero-extend size as a character 8->b,
16->h, 32->w.
+ 'I': If the operand is a duplicated vector constant,
+ replace it with the duplicated scalar. If the
+ operand is then a floating-point constant, replace
+ it with the integer bit representation. Print the
+ transformed constant as a signed decimal number.
'p': Prints N such that 2^N == X (X must be power of 2 and
const int).
'P': Print the number of non-zero bits in X (a const_int).
asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
break;
+ case 'I':
+ {
+ x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
+ if (CONST_INT_P (x))
+ asm_fprintf (f, "%wd", INTVAL (x));
+ else
+ {
+ output_operand_lossage ("invalid operand for '%%%c'", code);
+ return;
+ }
+ break;
+ }
+
case 'M':
case 'm':
{
bool
aarch64_sve_dup_immediate_p (rtx x)
{
- rtx elt;
-
- if (!const_vec_duplicate_p (x, &elt)
- || !CONST_INT_P (elt))
+ x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
+ if (!CONST_INT_P (x))
return false;
- HOST_WIDE_INT val = INTVAL (elt);
+ HOST_WIDE_INT val = INTVAL (x);
if (val & 0xff)
return IN_RANGE (val, -0x80, 0x7f);
return IN_RANGE (val, -0x8000, 0x7f00);
REAL_VALUE_TYPE r, m;
bool fail;
+ x = unwrap_const_vec_duplicate (x);
if (!CONST_DOUBLE_P (x))
return false;
else
aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
+ if (!aarch64_sve_reg_or_dup_imm (ops[1], data_mode))
+ ops[1] = force_reg (data_mode, ops[1]);
+ /* The "false" value can only be zero if the "true" value is a constant. */
+ if (register_operand (ops[1], data_mode)
+ || !aarch64_simd_reg_or_zero (ops[2], data_mode))
+ ops[2] = force_reg (data_mode, ops[2]);
+
rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
}
(define_constraint "Ufc"
"A floating point constant which can be used with an\
FMOV immediate operation."
- (and (match_code "const_double")
+ (and (match_code "const_double,const_vector")
(match_test "aarch64_float_const_representable_p (op)")))
(define_constraint "Uvi"
CMP instructions."
(match_operand 0 "aarch64_sve_cmp_vsc_immediate"))
+(define_constraint "vss"
+ "@internal
+ A constraint that matches a signed immediate operand valid for SVE
+ DUP instructions."
+ (match_test "aarch64_sve_dup_immediate_p (op)"))
+
(define_constraint "vsd"
"@internal
A constraint that matches an unsigned immediate operand valid for SVE
(define_predicate "aarch64_sve_dup_immediate"
(and (match_code "const,const_vector")
- (match_test "aarch64_sve_dup_immediate_p (op)")))
+ (ior (match_test "aarch64_sve_dup_immediate_p (op)")
+ (match_test "aarch64_float_const_representable_p (op)"))))
(define_predicate "aarch64_sve_cmp_vsc_immediate"
(and (match_code "const,const_vector")
(ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_sve_vsm_immediate")))
+(define_predicate "aarch64_sve_reg_or_dup_imm"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "aarch64_sve_dup_immediate")))
+
(define_predicate "aarch64_sve_cmp_vsc_operand"
(ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_sve_cmp_vsc_immediate")))
+2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
+ Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
+
+ * gcc.target/aarch64/sve/vcond_18.c: New test.
+ * gcc.target/aarch64/sve/vcond_18_run.c: Likewise.
+ * gcc.target/aarch64/sve/vcond_19.c: Likewise.
+ * gcc.target/aarch64/sve/vcond_19_run.c: Likewise.
+ * gcc.target/aarch64/sve/vcond_20.c: Likewise.
+ * gcc.target/aarch64/sve/vcond_20_run.c: Likewise.
+
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/fmaxnm_1.c: New test.
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define DEF_LOOP(TYPE, NAME, CONST) \
+ void \
+ test_##TYPE##_##NAME (TYPE *restrict x, \
+ TYPE *restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ x[i] = pred[i] > 0 ? CONST : 0; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, 2, 2.0) \
+ T (TYPE, 1p25, 1.25) \
+ T (TYPE, 32p25, 32.25) \
+ T (TYPE, m4, -4.0) \
+ T (TYPE, m2p5, -2.5) \
+ T (TYPE, m64p5, -64.5)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, _Float16) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #16384\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #15616\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #-15360\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #-16128\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "vcond_18.c"
+
+#define N 97
+
+#define TEST_LOOP(TYPE, NAME, CONST) \
+ { \
+ TYPE x[N], pred[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ pred[i] = i % 5 <= i % 6; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (x, pred, N); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ if (x[i] != (TYPE) (pred[i] > 0 ? CONST : 0)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define DEF_LOOP(TYPE, NAME, CONST) \
+ void \
+ test_##TYPE##_##NAME (TYPE *restrict x, \
+ TYPE *restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ x[i] = pred[i] > 0 ? CONST : pred[i]; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, 2, 2.0) \
+ T (TYPE, 1p25, 1.25) \
+ T (TYPE, 32p25, 32.25) \
+ T (TYPE, m4, -4.0) \
+ T (TYPE, m2p5, -2.5) \
+ T (TYPE, m64p5, -64.5)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, _Float16) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #16384\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #15616\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-15360\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-16128\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "vcond_19.c"
+
+#define N 97
+
+#define TEST_LOOP(TYPE, NAME, CONST) \
+ { \
+ TYPE x[N], pred[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ pred[i] = i % 5 <= i % 6 ? i : 0; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (x, pred, N); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ if (x[i] != (TYPE) (pred[i] > 0 ? CONST : pred[i])) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define DEF_LOOP(TYPE, NAME, CONST) \
+ void \
+ test_##TYPE##_##NAME (TYPE *restrict x, \
+ TYPE *restrict pred, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ x[i] = pred[i] > 0 ? CONST : 12.0; \
+ }
+
+#define TEST_TYPE(T, TYPE) \
+ T (TYPE, 2, 2.0) \
+ T (TYPE, 1p25, 1.25) \
+ T (TYPE, 32p25, 32.25) \
+ T (TYPE, m4, -4.0) \
+ T (TYPE, m2p5, -2.5) \
+ T (TYPE, m64p5, -64.5)
+
+#define TEST_ALL(T) \
+ TEST_TYPE (T, _Float16) \
+ TEST_TYPE (T, float) \
+ TEST_TYPE (T, double)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #16384\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #15616\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-15360\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-16128\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 12 } } */
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "vcond_20.c"
+
+#define N 97
+
+#define TEST_LOOP(TYPE, NAME, CONST) \
+ { \
+ TYPE x[N], pred[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ pred[i] = i % 5 <= i % 6; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE##_##NAME (x, pred, N); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ if (x[i] != (TYPE) (pred[i] > 0 ? CONST : 12.0)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}