[AArch64] SVE tests
authorRichard Sandiford <richard.sandiford@linaro.org>
Sat, 13 Jan 2018 17:55:24 +0000 (17:55 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Sat, 13 Jan 2018 17:55:24 +0000 (17:55 +0000)
This patch adds gcc.target/aarch64 tests for SVE, and forces some
existing Advanced SIMD tests to use -march=armv8-a.

2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
    Alan Hayward  <alan.hayward@arm.com>
    David Sherwood  <david.sherwood@arm.com>

gcc/testsuite/
* lib/target-supports.exp (check_effective_target_aarch64_asm_sve_ok):
New proc.
* gcc.target/aarch64/bic_imm_1.c: Use #pragma GCC target "+nosve".
* gcc.target/aarch64/fmaxmin.c: Likewise.
* gcc.target/aarch64/fmul_fcvt_2.c: Likewise.
* gcc.target/aarch64/orr_imm_1.c: Likewise.
* gcc.target/aarch64/pr62178.c: Likewise.
* gcc.target/aarch64/pr71727-2.c: Likewise.
* gcc.target/aarch64/saddw-1.c: Likewise.
* gcc.target/aarch64/saddw-2.c: Likewise.
* gcc.target/aarch64/uaddw-1.c: Likewise.
* gcc.target/aarch64/uaddw-2.c: Likewise.
* gcc.target/aarch64/uaddw-3.c: Likewise.
* gcc.target/aarch64/vect-add-sub-cond.c: Likewise.
* gcc.target/aarch64/vect-compile.c: Likewise.
* gcc.target/aarch64/vect-faddv-compile.c: Likewise.
* gcc.target/aarch64/vect-fcm-eq-d.c: Likewise.
* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
* gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
* gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
* gcc.target/aarch64/vect-fmax-fmin-compile.c: Likewise.
* gcc.target/aarch64/vect-fmaxv-fminv-compile.c: Likewise.
* gcc.target/aarch64/vect-fmovd-zero.c: Likewise.
* gcc.target/aarch64/vect-fmovd.c: Likewise.
* gcc.target/aarch64/vect-fmovf-zero.c: Likewise.
* gcc.target/aarch64/vect-fmovf.c: Likewise.
* gcc.target/aarch64/vect-fp-compile.c: Likewise.
* gcc.target/aarch64/vect-ld1r-compile-fp.c: Likewise.
* gcc.target/aarch64/vect-ld1r-compile.c: Likewise.
* gcc.target/aarch64/vect-movi.c: Likewise.
* gcc.target/aarch64/vect-mull-compile.c: Likewise.
* gcc.target/aarch64/vect-reduc-or_1.c: Likewise.
* gcc.target/aarch64/vect-vaddv.c: Likewise.
* gcc.target/aarch64/vect_saddl_1.c: Likewise.
* gcc.target/aarch64/vect_smlal_1.c: Likewise.
* gcc.target/aarch64/vector_initialization_nostack.c: XFAIL for
fixed-length SVE.
* gcc.target/aarch64/sve/aarch64-sve.exp: New file.
* gcc.target/aarch64/sve/arith_1.c: New test.
* gcc.target/aarch64/sve/const_pred_1.C: Likewise.
* gcc.target/aarch64/sve/const_pred_2.C: Likewise.
* gcc.target/aarch64/sve/const_pred_3.C: Likewise.
* gcc.target/aarch64/sve/const_pred_4.C: Likewise.
* gcc.target/aarch64/sve/cvtf_signed_1.c: Likewise.
* gcc.target/aarch64/sve/cvtf_signed_1_run.c: Likewise.
* gcc.target/aarch64/sve/cvtf_unsigned_1.c: Likewise.
* gcc.target/aarch64/sve/cvtf_unsigned_1_run.c: Likewise.
* gcc.target/aarch64/sve/dup_imm_1.c: Likewise.
* gcc.target/aarch64/sve/dup_imm_1_run.c: Likewise.
* gcc.target/aarch64/sve/dup_lane_1.c: Likewise.
* gcc.target/aarch64/sve/ext_1.c: Likewise.
* gcc.target/aarch64/sve/ext_2.c: Likewise.
* gcc.target/aarch64/sve/extract_1.c: Likewise.
* gcc.target/aarch64/sve/extract_2.c: Likewise.
* gcc.target/aarch64/sve/extract_3.c: Likewise.
* gcc.target/aarch64/sve/extract_4.c: Likewise.
* gcc.target/aarch64/sve/fabs_1.c: Likewise.
* gcc.target/aarch64/sve/fcvtz_signed_1.c: Likewise.
* gcc.target/aarch64/sve/fcvtz_signed_1_run.c: Likewise.
* gcc.target/aarch64/sve/fcvtz_unsigned_1.c: Likewise.
* gcc.target/aarch64/sve/fcvtz_unsigned_1_run.c: Likewise.
* gcc.target/aarch64/sve/fdiv_1.c: Likewise.
* gcc.target/aarch64/sve/fdup_1.c: Likewise.
* gcc.target/aarch64/sve/fdup_1_run.c: Likewise.
* gcc.target/aarch64/sve/fmad_1.c: Likewise.
* gcc.target/aarch64/sve/fmla_1.c: Likewise.
* gcc.target/aarch64/sve/fmls_1.c: Likewise.
* gcc.target/aarch64/sve/fmsb_1.c: Likewise.
* gcc.target/aarch64/sve/fmul_1.c: Likewise.
* gcc.target/aarch64/sve/fneg_1.c: Likewise.
* gcc.target/aarch64/sve/fnmad_1.c: Likewise.
* gcc.target/aarch64/sve/fnmla_1.c: Likewise.
* gcc.target/aarch64/sve/fnmls_1.c: Likewise.
* gcc.target/aarch64/sve/fnmsb_1.c: Likewise.
* gcc.target/aarch64/sve/fp_arith_1.c: Likewise.
* gcc.target/aarch64/sve/frinta_1.c: Likewise.
* gcc.target/aarch64/sve/frinti_1.c: Likewise.
* gcc.target/aarch64/sve/frintm_1.c: Likewise.
* gcc.target/aarch64/sve/frintp_1.c: Likewise.
* gcc.target/aarch64/sve/frintx_1.c: Likewise.
* gcc.target/aarch64/sve/frintz_1.c: Likewise.
* gcc.target/aarch64/sve/fsqrt_1.c: Likewise.
* gcc.target/aarch64/sve/fsubr_1.c: Likewise.
* gcc.target/aarch64/sve/index_1.c: Likewise.
* gcc.target/aarch64/sve/index_1_run.c: Likewise.
* gcc.target/aarch64/sve/ld1r_1.c: Likewise.
* gcc.target/aarch64/sve/load_const_offset_1.c: Likewise.
* gcc.target/aarch64/sve/load_const_offset_2.c: Likewise.
* gcc.target/aarch64/sve/load_const_offset_3.c: Likewise.
* gcc.target/aarch64/sve/load_scalar_offset_1.c: Likewise.
* gcc.target/aarch64/sve/logical_1.c: Likewise.
* gcc.target/aarch64/sve/loop_add_1.c: Likewise.
* gcc.target/aarch64/sve/loop_add_1_run.c: Likewise.
* gcc.target/aarch64/sve/mad_1.c: Likewise.
* gcc.target/aarch64/sve/maxmin_1.c: Likewise.
* gcc.target/aarch64/sve/maxmin_1_run.c: Likewise.
* gcc.target/aarch64/sve/maxmin_strict_1.c: Likewise.
* gcc.target/aarch64/sve/maxmin_strict_1_run.c: Likewise.
* gcc.target/aarch64/sve/mla_1.c: Likewise.
* gcc.target/aarch64/sve/mls_1.c: Likewise.
* gcc.target/aarch64/sve/mov_rr_1.c: Likewise.
* gcc.target/aarch64/sve/msb_1.c: Likewise.
* gcc.target/aarch64/sve/mul_1.c: Likewise.
* gcc.target/aarch64/sve/neg_1.c: Likewise.
* gcc.target/aarch64/sve/nlogical_1.c: Likewise.
* gcc.target/aarch64/sve/nlogical_1_run.c: Likewise.
* gcc.target/aarch64/sve/pack_1.c: Likewise.
* gcc.target/aarch64/sve/pack_1_run.c: Likewise.
* gcc.target/aarch64/sve/pack_fcvt_signed_1.c: Likewise.
* gcc.target/aarch64/sve/pack_fcvt_signed_1_run.c: Likewise.
* gcc.target/aarch64/sve/pack_fcvt_unsigned_1.c: Likewise.
* gcc.target/aarch64/sve/pack_fcvt_unsigned_1_run.c: Likewise.
* gcc.target/aarch64/sve/pack_float_1.c: Likewise.
* gcc.target/aarch64/sve/pack_float_1_run.c: Likewise.
* gcc.target/aarch64/sve/popcount_1.c: Likewise.
* gcc.target/aarch64/sve/popcount_1_run.c: Likewise.
* gcc.target/aarch64/sve/reduc_1.c: Likewise.
* gcc.target/aarch64/sve/reduc_1_run.c: Likewise.
* gcc.target/aarch64/sve/reduc_2.c: Likewise.
* gcc.target/aarch64/sve/reduc_2_run.c: Likewise.
* gcc.target/aarch64/sve/reduc_3.c: Likewise.
* gcc.target/aarch64/sve/rev_1.c: Likewise.
* gcc.target/aarch64/sve/revb_1.c: Likewise.
* gcc.target/aarch64/sve/revh_1.c: Likewise.
* gcc.target/aarch64/sve/revw_1.c: Likewise.
* gcc.target/aarch64/sve/shift_1.c: Likewise.
* gcc.target/aarch64/sve/single_1.c: Likewise.
* gcc.target/aarch64/sve/single_2.c: Likewise.
* gcc.target/aarch64/sve/single_3.c: Likewise.
* gcc.target/aarch64/sve/single_4.c: Likewise.
* gcc.target/aarch64/sve/spill_1.c: Likewise.
* gcc.target/aarch64/sve/store_scalar_offset_1.c: Likewise.
* gcc.target/aarch64/sve/subr_1.c: Likewise.
* gcc.target/aarch64/sve/trn1_1.c: Likewise.
* gcc.target/aarch64/sve/trn2_1.c: Likewise.
* gcc.target/aarch64/sve/unpack_fcvt_signed_1.c: Likewise.
* gcc.target/aarch64/sve/unpack_fcvt_signed_1_run.c: Likewise.
* gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c: Likewise.
* gcc.target/aarch64/sve/unpack_fcvt_unsigned_1_run.c: Likewise.
* gcc.target/aarch64/sve/unpack_float_1.c: Likewise.
* gcc.target/aarch64/sve/unpack_float_1_run.c: Likewise.
* gcc.target/aarch64/sve/unpack_signed_1.c: Likewise.
* gcc.target/aarch64/sve/unpack_signed_1_run.c: Likewise.
* gcc.target/aarch64/sve/unpack_unsigned_1.c: Likewise.
* gcc.target/aarch64/sve/unpack_unsigned_1_run.c: Likewise.
* gcc.target/aarch64/sve/uzp1_1.c: Likewise.
* gcc.target/aarch64/sve/uzp1_1_run.c: Likewise.
* gcc.target/aarch64/sve/uzp2_1.c: Likewise.
* gcc.target/aarch64/sve/uzp2_1_run.c: Likewise.
* gcc.target/aarch64/sve/vcond_1.C: Likewise.
* gcc.target/aarch64/sve/vcond_1_run.C: Likewise.
* gcc.target/aarch64/sve/vcond_2.c: Likewise.
* gcc.target/aarch64/sve/vcond_2_run.c: Likewise.
* gcc.target/aarch64/sve/vcond_3.c: Likewise.
* gcc.target/aarch64/sve/vcond_4.c: Likewise.
* gcc.target/aarch64/sve/vcond_4_run.c: Likewise.
* gcc.target/aarch64/sve/vcond_5.c: Likewise.
* gcc.target/aarch64/sve/vcond_5_run.c: Likewise.
* gcc.target/aarch64/sve/vcond_6.c: Likewise.
* gcc.target/aarch64/sve/vcond_6_run.c: Likewise.
* gcc.target/aarch64/sve/vec_init_1.c: Likewise.
* gcc.target/aarch64/sve/vec_init_1_run.c: Likewise.
* gcc.target/aarch64/sve/vec_init_2.c: Likewise.
* gcc.target/aarch64/sve/vec_perm_1.c: Likewise.
* gcc.target/aarch64/sve/vec_perm_1_run.c: Likewise.
* gcc.target/aarch64/sve/vec_perm_1_overrange_run.c: Likewise.
* gcc.target/aarch64/sve/vec_perm_const_1.c: Likewise.
* gcc.target/aarch64/sve/vec_perm_const_1_overrun.c: Likewise.
* gcc.target/aarch64/sve/vec_perm_const_1_run.c: Likewise.
* gcc.target/aarch64/sve/vec_perm_const_single_1.c: Likewise.
* gcc.target/aarch64/sve/vec_perm_const_single_1_run.c: Likewise.
* gcc.target/aarch64/sve/vec_perm_single_1.c: Likewise.
* gcc.target/aarch64/sve/vec_perm_single_1_run.c: Likewise.
* gcc.target/aarch64/sve/zip1_1.c: Likewise.
* gcc.target/aarch64/sve/zip2_1.c: Likewise.

Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com>
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256614

176 files changed:
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
gcc/testsuite/gcc.target/aarch64/fmaxmin.c
gcc/testsuite/gcc.target/aarch64/fmul_fcvt_2.c
gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
gcc/testsuite/gcc.target/aarch64/pr62178.c
gcc/testsuite/gcc.target/aarch64/pr71727-2.c
gcc/testsuite/gcc.target/aarch64/saddw-1.c
gcc/testsuite/gcc.target/aarch64/saddw-2.c
gcc/testsuite/gcc.target/aarch64/sve/aarch64-sve.exp [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/arith_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/const_pred_1.C [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/const_pred_2.C [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/const_pred_3.C [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/const_pred_4.C [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/cvtf_signed_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/cvtf_signed_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/cvtf_unsigned_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/cvtf_unsigned_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/dup_imm_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/dup_imm_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/dup_lane_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/ext_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/ext_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/extract_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/extract_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/extract_3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/extract_4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fabs_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fcvtz_signed_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fcvtz_signed_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fcvtz_unsigned_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fcvtz_unsigned_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fdiv_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fdup_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fdup_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fmad_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fmla_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fmls_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fmsb_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fneg_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fnmad_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fnmla_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fnmls_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fnmsb_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/frinta_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/frinti_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/frintm_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/frintp_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/frintx_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/frintz_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fsqrt_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/index_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/index_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/ld1r_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/load_const_offset_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/load_const_offset_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/load_const_offset_3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/load_scalar_offset_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/logical_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/loop_add_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/loop_add_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mad_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/maxmin_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/maxmin_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/maxmin_strict_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/maxmin_strict_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mla_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mls_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mov_rr_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/msb_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mul_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/neg_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/nlogical_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/nlogical_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pack_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pack_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_signed_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_signed_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_unsigned_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_unsigned_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pack_float_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pack_float_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/popcount_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/reduc_3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/rev_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/revb_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/revh_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/revw_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/shift_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/single_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/single_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/single_3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/single_4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/spill_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/store_scalar_offset_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/subr_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/trn1_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/trn2_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpack_signed_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpack_signed_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpack_unsigned_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpack_unsigned_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/uzp1_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/uzp1_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/uzp2_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/uzp2_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_1.C [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_1_run.C [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_2_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_4_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_5_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vcond_6_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_init_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_init_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_init_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_perm_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_perm_1_overrange_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_perm_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_1_overrun.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_single_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_single_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_perm_single_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/vec_perm_single_1_run.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/zip1_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/zip2_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/uaddw-1.c
gcc/testsuite/gcc.target/aarch64/uaddw-2.c
gcc/testsuite/gcc.target/aarch64/uaddw-3.c
gcc/testsuite/gcc.target/aarch64/vect-add-sub-cond.c
gcc/testsuite/gcc.target/aarch64/vect-compile.c
gcc/testsuite/gcc.target/aarch64/vect-faddv-compile.c
gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin-compile.c
gcc/testsuite/gcc.target/aarch64/vect-fmaxv-fminv-compile.c
gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c
gcc/testsuite/gcc.target/aarch64/vect-fmovd.c
gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c
gcc/testsuite/gcc.target/aarch64/vect-fmovf.c
gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c
gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile-fp.c
gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile.c
gcc/testsuite/gcc.target/aarch64/vect-movi.c
gcc/testsuite/gcc.target/aarch64/vect-mull-compile.c
gcc/testsuite/gcc.target/aarch64/vect-reduc-or_1.c
gcc/testsuite/gcc.target/aarch64/vect-vaddv.c
gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c
gcc/testsuite/gcc.target/aarch64/vect_smlal_1.c
gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c
gcc/testsuite/lib/target-supports.exp

index 0ad46ee2e4bc0a1c4cd752a75ed3fc3155e8408d..eb1b229530aab3197a061cc5cb1765d5786a8c97 100644 (file)
@@ -1,3 +1,185 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * lib/target-supports.exp (check_effective_target_aarch64_asm_sve_ok):
+       New proc.
+       * gcc.target/aarch64/bic_imm_1.c: Use #pragma GCC target "+nosve".
+       * gcc.target/aarch64/fmaxmin.c: Likewise.
+       * gcc.target/aarch64/fmul_fcvt_2.c: Likewise.
+       * gcc.target/aarch64/orr_imm_1.c: Likewise.
+       * gcc.target/aarch64/pr62178.c: Likewise.
+       * gcc.target/aarch64/pr71727-2.c: Likewise.
+       * gcc.target/aarch64/saddw-1.c: Likewise.
+       * gcc.target/aarch64/saddw-2.c: Likewise.
+       * gcc.target/aarch64/uaddw-1.c: Likewise.
+       * gcc.target/aarch64/uaddw-2.c: Likewise.
+       * gcc.target/aarch64/uaddw-3.c: Likewise.
+       * gcc.target/aarch64/vect-add-sub-cond.c: Likewise.
+       * gcc.target/aarch64/vect-compile.c: Likewise.
+       * gcc.target/aarch64/vect-faddv-compile.c: Likewise.
+       * gcc.target/aarch64/vect-fcm-eq-d.c: Likewise.
+       * gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
+       * gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
+       * gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
+       * gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
+       * gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
+       * gcc.target/aarch64/vect-fmax-fmin-compile.c: Likewise.
+       * gcc.target/aarch64/vect-fmaxv-fminv-compile.c: Likewise.
+       * gcc.target/aarch64/vect-fmovd-zero.c: Likewise.
+       * gcc.target/aarch64/vect-fmovd.c: Likewise.
+       * gcc.target/aarch64/vect-fmovf-zero.c: Likewise.
+       * gcc.target/aarch64/vect-fmovf.c: Likewise.
+       * gcc.target/aarch64/vect-fp-compile.c: Likewise.
+       * gcc.target/aarch64/vect-ld1r-compile-fp.c: Likewise.
+       * gcc.target/aarch64/vect-ld1r-compile.c: Likewise.
+       * gcc.target/aarch64/vect-movi.c: Likewise.
+       * gcc.target/aarch64/vect-mull-compile.c: Likewise.
+       * gcc.target/aarch64/vect-reduc-or_1.c: Likewise.
+       * gcc.target/aarch64/vect-vaddv.c: Likewise.
+       * gcc.target/aarch64/vect_saddl_1.c: Likewise.
+       * gcc.target/aarch64/vect_smlal_1.c: Likewise.
+       * gcc.target/aarch64/vector_initialization_nostack.c: XFAIL for
+       fixed-length SVE.
+       * gcc.target/aarch64/sve/aarch64-sve.exp: New file.
+       * gcc.target/aarch64/sve/arith_1.c: New test.
+       * gcc.target/aarch64/sve/const_pred_1.C: Likewise.
+       * gcc.target/aarch64/sve/const_pred_2.C: Likewise.
+       * gcc.target/aarch64/sve/const_pred_3.C: Likewise.
+       * gcc.target/aarch64/sve/const_pred_4.C: Likewise.
+       * gcc.target/aarch64/sve/cvtf_signed_1.c: Likewise.
+       * gcc.target/aarch64/sve/cvtf_signed_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/cvtf_unsigned_1.c: Likewise.
+       * gcc.target/aarch64/sve/cvtf_unsigned_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/dup_imm_1.c: Likewise.
+       * gcc.target/aarch64/sve/dup_imm_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/dup_lane_1.c: Likewise.
+       * gcc.target/aarch64/sve/ext_1.c: Likewise.
+       * gcc.target/aarch64/sve/ext_2.c: Likewise.
+       * gcc.target/aarch64/sve/extract_1.c: Likewise.
+       * gcc.target/aarch64/sve/extract_2.c: Likewise.
+       * gcc.target/aarch64/sve/extract_3.c: Likewise.
+       * gcc.target/aarch64/sve/extract_4.c: Likewise.
+       * gcc.target/aarch64/sve/fabs_1.c: Likewise.
+       * gcc.target/aarch64/sve/fcvtz_signed_1.c: Likewise.
+       * gcc.target/aarch64/sve/fcvtz_signed_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/fcvtz_unsigned_1.c: Likewise.
+       * gcc.target/aarch64/sve/fcvtz_unsigned_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/fdiv_1.c: Likewise.
+       * gcc.target/aarch64/sve/fdup_1.c: Likewise.
+       * gcc.target/aarch64/sve/fdup_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/fmad_1.c: Likewise.
+       * gcc.target/aarch64/sve/fmla_1.c: Likewise.
+       * gcc.target/aarch64/sve/fmls_1.c: Likewise.
+       * gcc.target/aarch64/sve/fmsb_1.c: Likewise.
+       * gcc.target/aarch64/sve/fmul_1.c: Likewise.
+       * gcc.target/aarch64/sve/fneg_1.c: Likewise.
+       * gcc.target/aarch64/sve/fnmad_1.c: Likewise.
+       * gcc.target/aarch64/sve/fnmla_1.c: Likewise.
+       * gcc.target/aarch64/sve/fnmls_1.c: Likewise.
+       * gcc.target/aarch64/sve/fnmsb_1.c: Likewise.
+       * gcc.target/aarch64/sve/fp_arith_1.c: Likewise.
+       * gcc.target/aarch64/sve/frinta_1.c: Likewise.
+       * gcc.target/aarch64/sve/frinti_1.c: Likewise.
+       * gcc.target/aarch64/sve/frintm_1.c: Likewise.
+       * gcc.target/aarch64/sve/frintp_1.c: Likewise.
+       * gcc.target/aarch64/sve/frintx_1.c: Likewise.
+       * gcc.target/aarch64/sve/frintz_1.c: Likewise.
+       * gcc.target/aarch64/sve/fsqrt_1.c: Likewise.
+       * gcc.target/aarch64/sve/fsubr_1.c: Likewise.
+       * gcc.target/aarch64/sve/index_1.c: Likewise.
+       * gcc.target/aarch64/sve/index_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/ld1r_1.c: Likewise.
+       * gcc.target/aarch64/sve/load_const_offset_1.c: Likewise.
+       * gcc.target/aarch64/sve/load_const_offset_2.c: Likewise.
+       * gcc.target/aarch64/sve/load_const_offset_3.c: Likewise.
+       * gcc.target/aarch64/sve/load_scalar_offset_1.c: Likewise.
+       * gcc.target/aarch64/sve/logical_1.c: Likewise.
+       * gcc.target/aarch64/sve/loop_add_1.c: Likewise.
+       * gcc.target/aarch64/sve/loop_add_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/mad_1.c: Likewise.
+       * gcc.target/aarch64/sve/maxmin_1.c: Likewise.
+       * gcc.target/aarch64/sve/maxmin_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/maxmin_strict_1.c: Likewise.
+       * gcc.target/aarch64/sve/maxmin_strict_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/mla_1.c: Likewise.
+       * gcc.target/aarch64/sve/mls_1.c: Likewise.
+       * gcc.target/aarch64/sve/mov_rr_1.c: Likewise.
+       * gcc.target/aarch64/sve/msb_1.c: Likewise.
+       * gcc.target/aarch64/sve/mul_1.c: Likewise.
+       * gcc.target/aarch64/sve/neg_1.c: Likewise.
+       * gcc.target/aarch64/sve/nlogical_1.c: Likewise.
+       * gcc.target/aarch64/sve/nlogical_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/pack_1.c: Likewise.
+       * gcc.target/aarch64/sve/pack_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/pack_fcvt_signed_1.c: Likewise.
+       * gcc.target/aarch64/sve/pack_fcvt_signed_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/pack_fcvt_unsigned_1.c: Likewise.
+       * gcc.target/aarch64/sve/pack_fcvt_unsigned_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/pack_float_1.c: Likewise.
+       * gcc.target/aarch64/sve/pack_float_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/popcount_1.c: Likewise.
+       * gcc.target/aarch64/sve/popcount_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/reduc_1.c: Likewise.
+       * gcc.target/aarch64/sve/reduc_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/reduc_2.c: Likewise.
+       * gcc.target/aarch64/sve/reduc_2_run.c: Likewise.
+       * gcc.target/aarch64/sve/reduc_3.c: Likewise.
+       * gcc.target/aarch64/sve/rev_1.c: Likewise.
+       * gcc.target/aarch64/sve/revb_1.c: Likewise.
+       * gcc.target/aarch64/sve/revh_1.c: Likewise.
+       * gcc.target/aarch64/sve/revw_1.c: Likewise.
+       * gcc.target/aarch64/sve/shift_1.c: Likewise.
+       * gcc.target/aarch64/sve/single_1.c: Likewise.
+       * gcc.target/aarch64/sve/single_2.c: Likewise.
+       * gcc.target/aarch64/sve/single_3.c: Likewise.
+       * gcc.target/aarch64/sve/single_4.c: Likewise.
+       * gcc.target/aarch64/sve/spill_1.c: Likewise.
+       * gcc.target/aarch64/sve/store_scalar_offset_1.c: Likewise.
+       * gcc.target/aarch64/sve/subr_1.c: Likewise.
+       * gcc.target/aarch64/sve/trn1_1.c: Likewise.
+       * gcc.target/aarch64/sve/trn2_1.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_fcvt_signed_1.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_fcvt_signed_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_fcvt_unsigned_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_float_1.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_float_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_signed_1.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_signed_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_unsigned_1.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_unsigned_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/uzp1_1.c: Likewise.
+       * gcc.target/aarch64/sve/uzp1_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/uzp2_1.c: Likewise.
+       * gcc.target/aarch64/sve/uzp2_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_1.C: Likewise.
+       * gcc.target/aarch64/sve/vcond_1_run.C: Likewise.
+       * gcc.target/aarch64/sve/vcond_2.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_2_run.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_3.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_4.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_4_run.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_5.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_5_run.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_6.c: Likewise.
+       * gcc.target/aarch64/sve/vcond_6_run.c: Likewise.
+       * gcc.target/aarch64/sve/vec_init_1.c: Likewise.
+       * gcc.target/aarch64/sve/vec_init_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/vec_init_2.c: Likewise.
+       * gcc.target/aarch64/sve/vec_perm_1.c: Likewise.
+       * gcc.target/aarch64/sve/vec_perm_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/vec_perm_1_overrange_run.c: Likewise.
+       * gcc.target/aarch64/sve/vec_perm_const_1.c: Likewise.
+       * gcc.target/aarch64/sve/vec_perm_const_1_overrun.c: Likewise.
+       * gcc.target/aarch64/sve/vec_perm_const_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/vec_perm_const_single_1.c: Likewise.
+       * gcc.target/aarch64/sve/vec_perm_const_single_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/vec_perm_single_1.c: Likewise.
+       * gcc.target/aarch64/sve/vec_perm_single_1_run.c: Likewise.
+       * gcc.target/aarch64/sve/zip1_1.c: Likewise.
+       * gcc.target/aarch64/sve/zip2_1.c: Likewise.
+
 2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
            Alan Hayward  <alan.hayward@arm.com>
            David Sherwood  <david.sherwood@arm.com>
index b14f0091ff5c78dd56bc02a3913d473f71e192a1..08b2a9c540d40d420738c4ca05c851938b7de171 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do assemble } */
 /* { dg-options "-O2 --save-temps -ftree-vectorize" } */
 
+#pragma GCC target "+nosve"
+
 /* Each function uses the correspoding 'CLASS' in
    Marco CHECK (aarch64_simd_valid_immediate).  */
 
index 4447e33f7ee69f73d52014c584d71e60e0264012..8089037b98862cef764cc6c1cc9dd7d5d7f4edcb 100644 (file)
@@ -1,6 +1,7 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-vect-cost-model -save-temps" } */
 
+#pragma GCC target "+nosve"
 
 extern void abort (void);
 double fmax (double, double);
index 4ac3ab734445a820b43dbc473192ab7b27f183d4..8f0240bf5f7585b3098ddb23fef981270b0d7c73 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-save-temps -O2 -ftree-vectorize -fno-inline -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #define N 1024
 
 #define FUNC_DEF(__a)          \
index ff6f68350eb5df07cf3a8499eee7af8b37aa64b1..4c8208b5ac226ff93859f0d2633dac96bad3cac7 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do assemble } */
 /* { dg-options "-O2 --save-temps -ftree-vectorize" } */
 
+#pragma GCC target "+nosve"
+
 /* Each function uses the correspoding 'CLASS' in
    Marco CHECK (aarch64_simd_valid_immediate).  */
 
index 1bf6d838d3a49ed5d8ecf9ae0157bd2a9159bfb4..ccb400fc9aee7a419287dc006918de3fb9d7da73 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O3" } */
 
+#pragma GCC target "+nosve"
+
 int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1];
 
 void foo (void) {
index 2bc803ab330ddbab9017fb96398cca252cabe6e4..58fe5bcd59c80ef78cca9e10f01b978023420e0f 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-mstrict-align -O3" } */
 
+#pragma GCC target "+nosve"
+
 unsigned char foo(const unsigned char *buffer, unsigned int length)
 {
   unsigned char sum;
index 7500fb82444e60bf9c1246bf4e3d28466339d7a4..f8871209b8a114e1e37b7ea1538417eef612c864 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O3" } */
 
+#pragma GCC target "+nosve"
+
 int 
 t6(int len, void * dummy, short * __restrict x)
 {
index 5d9c8d9edc2103263b55ebe450fdeb92688aa693..b9fc442a2f7b6c83e3a75a5f813d04cfa2b6a6f6 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O3" } */
 
+#pragma GCC target "+nosve"
+
 int 
 t6(int len, void * dummy, int * __restrict x)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/aarch64-sve.exp b/gcc/testsuite/gcc.target/aarch64/sve/aarch64-sve.exp
new file mode 100644 (file)
index 0000000..934a6a8
--- /dev/null
@@ -0,0 +1,52 @@
+#  Specific regression driver for AArch64 SVE.
+#  Copyright (C) 2009-2018 Free Software Foundation, Inc.
+#  Contributed by ARM Ltd.
+#
+#  This file is part of GCC.
+#
+#  GCC is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 3, or (at your option)
+#  any later version.
+#
+#  GCC is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with GCC; see the file COPYING3.  If not see
+#  <http://www.gnu.org/licenses/>.  */
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an AArch64 target.
+if {![istarget aarch64*-*-*] } then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+    set DEFAULT_CFLAGS " -ansi -pedantic-errors"
+}
+
+# Initialize `dg'.
+dg-init
+
+# Force SVE if we're not testing it already.
+if { [check_effective_target_aarch64_sve] } {
+    set sve_flags ""
+} else {
+    set sve_flags "-march=armv8.2-a+sve"
+}
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
+    $sve_flags $DEFAULT_CFLAGS
+
+# All done.
+dg-finish
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/arith_1.c b/gcc/testsuite/gcc.target/aarch64/sve/arith_1.c
new file mode 100644 (file)
index 0000000..c2e1f6c
--- /dev/null
@@ -0,0 +1,121 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+#define DO_REGREG_OPS(TYPE, OP, NAME)                          \
+void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count)  \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] OP src[i];                                 \
+}
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME)                \
+void varithimm_##NAME##_##TYPE (TYPE *dst, int count)  \
+{                                                      \
+  for (int i = 0; i < count; ++i)                      \
+    dst[i] = dst[i] OP VALUE;                          \
+}
+
+#define DO_ARITH_OPS(TYPE, OP, NAME)                   \
+  DO_REGREG_OPS (TYPE, OP, NAME);                      \
+  DO_IMMEDIATE_OPS (0, TYPE, OP, NAME ## 0);           \
+  DO_IMMEDIATE_OPS (5, TYPE, OP, NAME ## 5);           \
+  DO_IMMEDIATE_OPS (255, TYPE, OP, NAME ## 255);       \
+  DO_IMMEDIATE_OPS (256, TYPE, OP, NAME ## 256);       \
+  DO_IMMEDIATE_OPS (257, TYPE, OP, NAME ## 257);       \
+  DO_IMMEDIATE_OPS (65280, TYPE, OP, NAME ## 65280);   \
+  DO_IMMEDIATE_OPS (65281, TYPE, OP, NAME ## 65281);   \
+  DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1);
+
+DO_ARITH_OPS (int8_t, +, add)
+DO_ARITH_OPS (int16_t, +, add)
+DO_ARITH_OPS (int32_t, +, add)
+DO_ARITH_OPS (int64_t, +, add)
+DO_ARITH_OPS (int8_t, -, minus)
+DO_ARITH_OPS (int16_t, -, minus)
+DO_ARITH_OPS (int32_t, -, minus)
+DO_ARITH_OPS (int64_t, -, minus)
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 5 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 5 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #251\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #255\n} 4 } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #257\n} } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #65280\n} } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #65281\n} } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #1\n} } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #256\n} 2 } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #257\n} } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #65280\n} 2 } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #65281\n} } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #256\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #257\n} } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #65280\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #65281\n} } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #256\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #257\n} } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #65280\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #65281\n} } } */
+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #1\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #5\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #257\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #65280\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #65281\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 2 } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #256\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #257\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #65280\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #65281\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #256\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #257\n} } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #65280\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #65281\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #256\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #257\n} } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #65280\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #65281\n} } } */
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_pred_1.C b/gcc/testsuite/gcc.target/aarch64/sve/const_pred_1.C
new file mode 100644 (file)
index 0000000..58ab159
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+
+typedef signed char vnx16qi __attribute__((vector_size(32)));
+
+vnx16qi
+foo (vnx16qi x, vnx16qi y)
+{
+  return (vnx16qi) { -1, 0, 0, -1, -1, -1, 0, 0,
+                    -1, -1, -1, -1, 0, 0, 0, 0,
+                    -1, -1, -1, -1, -1, -1, -1, -1,
+                    0, 0, 0, 0, 0, 0, 0, 0 } ? x : y;
+}
+
+/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
+/* { dg-final { scan-assembler {\t\.byte\t57\n\t\.byte\t15\n\t\.byte\t(255|-1)\n\t\.byte\t0\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_pred_2.C b/gcc/testsuite/gcc.target/aarch64/sve/const_pred_2.C
new file mode 100644 (file)
index 0000000..55dc874
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+
+typedef short vnx8hi __attribute__((vector_size(32)));
+
+vnx8hi
+foo (vnx8hi x, vnx8hi y)
+{
+  return (vnx8hi) { -1, 0, 0, -1, -1, -1, 0, 0,
+                   -1, -1, -1, -1, 0, 0, 0, 0 } ? x : y;
+}
+
+/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
+/* { dg-final { scan-assembler {\t\.byte\t65\n\t\.byte\t5\n\t\.byte\t85\n\t\.byte\t0\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_pred_3.C b/gcc/testsuite/gcc.target/aarch64/sve/const_pred_3.C
new file mode 100644 (file)
index 0000000..16d0a33
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+
+typedef int vnx4si __attribute__((vector_size(32)));
+
+vnx4si
+foo (vnx4si x, vnx4si y)
+{
+  return (vnx4si) { -1, 0, 0, -1, -1, -1, 0, 0 } ? x : y;
+}
+
+/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
+/* { dg-final { scan-assembler {\t\.byte\t1\n\t\.byte\t16\n\t\.byte\t17\n\t\.byte\t0\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_pred_4.C b/gcc/testsuite/gcc.target/aarch64/sve/const_pred_4.C
new file mode 100644 (file)
index 0000000..02bd096
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+
+typedef long long vnx2di __attribute__((vector_size(32)));
+
+vnx2di
+foo (vnx2di x, vnx2di y)
+{
+  return (vnx2di) { -1, 0, 0, -1 } ? x : y;
+}
+
+/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */
+/* { dg-final { scan-assembler {\t\.byte\t1\n\t\.byte\t0\n\t\.byte\t0\n\t\.byte\t1\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cvtf_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cvtf_signed_1.c
new file mode 100644 (file)
index 0000000..b4a59b8
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+vcvtf_16 (_Float16 *dst, int16_t *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (_Float16) src1[i];
+}
+
+void __attribute__ ((noinline, noclone))
+vcvtf_32 (float *dst, int32_t *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (float) src1[i];
+}
+
+void __attribute__ ((noinline, noclone))
+vcvtf_64 (double *dst, int64_t *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (double) src1[i];
+}
+
+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cvtf_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cvtf_signed_1_run.c
new file mode 100644 (file)
index 0000000..451c285
--- /dev/null
@@ -0,0 +1,47 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cvtf_signed_1.c"
+
+#define ARRAY_SIZE 47
+
+#define VAL1 (i ^ 3)
+#define VAL2 ((i * 3) - (15 * 3))
+#define VAL3 ((i * 0xffdfffef) - (11 * 0xffdfffef))
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  static _Float16 array_dest16[ARRAY_SIZE];
+  static float array_dest32[ARRAY_SIZE];
+  static double array_dest64[ARRAY_SIZE];
+
+  int16_t array_source16[ARRAY_SIZE];
+  int32_t array_source32[ARRAY_SIZE];
+  int64_t array_source64[ARRAY_SIZE];
+
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+      array_source16[i] = VAL1;
+      array_source32[i] = VAL2;
+      array_source64[i] = VAL3;
+      asm volatile ("" ::: "memory");
+    }
+
+  vcvtf_16 (array_dest16, array_source16, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest16[i] != (_Float16) VAL1)
+      __builtin_abort ();
+
+  vcvtf_32 (array_dest32, array_source32, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest32[i] != (float) VAL2)
+      __builtin_abort ();
+
+  vcvtf_64 (array_dest64, array_source64, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest64[i] != (double) VAL3)
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cvtf_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cvtf_unsigned_1.c
new file mode 100644 (file)
index 0000000..40e44cd
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+vcvtf_16 (_Float16 *dst, uint16_t *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (_Float16) src1[i];
+}
+
+void __attribute__ ((noinline, noclone))
+vcvtf_32 (float *dst, uint32_t *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (float) src1[i];
+}
+
+void __attribute__ ((noinline, noclone))
+vcvtf_64 (double *dst, uint64_t *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (double) src1[i];
+}
+
+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cvtf_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cvtf_unsigned_1_run.c
new file mode 100644 (file)
index 0000000..a9126f6
--- /dev/null
@@ -0,0 +1,47 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cvtf_unsigned_1.c"
+
+#define ARRAY_SIZE 65
+
+#define VAL1 (i * 109)
+#define VAL2 (i * 9456)
+#define VAL3 (i * 0xfddff13f)
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  static _Float16 array_dest16[ARRAY_SIZE];
+  static float array_dest32[ARRAY_SIZE];
+  static double array_dest64[ARRAY_SIZE];
+
+  uint16_t array_source16[ARRAY_SIZE];
+  uint32_t array_source32[ARRAY_SIZE];
+  uint64_t array_source64[ARRAY_SIZE];
+
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+      array_source16[i] = VAL1;
+      array_source32[i] = VAL2;
+      array_source64[i] = VAL3;
+      asm volatile ("" ::: "memory");
+    }
+
+  vcvtf_16 (array_dest16, array_source16, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest16[i] != (_Float16) VAL1)
+      __builtin_abort ();
+
+  vcvtf_32 (array_dest32, array_source32, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest32[i] != (float) VAL2)
+      __builtin_abort ();
+
+  vcvtf_64 (array_dest64, array_source64, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest64[i] != (double) VAL3)
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup_imm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/dup_imm_1.c
new file mode 100644 (file)
index 0000000..3b8854e
--- /dev/null
@@ -0,0 +1,138 @@
+/* { dg-do compile } */
+/* -fno-tree-loop-distribute-patterns prevents conversion to memset.  */
+/* { dg-options "-O3 -fno-tree-loop-distribute-patterns" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE))
+
+#define DEF_SET_IMM(TYPE, IMM, SUFFIX)         \
+void __attribute__ ((noinline, noclone))       \
+set_##TYPE##_##SUFFIX (TYPE *a)                        \
+{                                              \
+  for (int i = 0; i < NUM_ELEMS (TYPE); i++)   \
+    a[i] = IMM;                                        \
+}
+
+/* --- VALID --- */
+
+DEF_SET_IMM (int8_t, 0, imm_0)
+DEF_SET_IMM (int16_t, 0, imm_0)
+DEF_SET_IMM (int32_t, 0, imm_0)
+DEF_SET_IMM (int64_t, 0, imm_0)
+
+DEF_SET_IMM (int8_t, -1, imm_m1)
+DEF_SET_IMM (int16_t, -1, imm_m1)
+DEF_SET_IMM (int32_t, -1, imm_m1)
+DEF_SET_IMM (int64_t, -1, imm_m1)
+
+DEF_SET_IMM (int8_t, 1, imm_1)
+DEF_SET_IMM (int16_t, 1, imm_1)
+DEF_SET_IMM (int32_t, 1, imm_1)
+DEF_SET_IMM (int64_t, 1, imm_1)
+
+DEF_SET_IMM (int8_t, 127, imm_127)
+DEF_SET_IMM (int16_t, 127, imm_127)
+DEF_SET_IMM (int32_t, 127, imm_127)
+DEF_SET_IMM (int64_t, 127, imm_127)
+
+DEF_SET_IMM (int8_t, -128, imm_m128)
+DEF_SET_IMM (int16_t, -128, imm_m128)
+DEF_SET_IMM (int32_t, -128, imm_m128)
+DEF_SET_IMM (int64_t, -128, imm_m128)
+
+// No uint8_t variant - size too large for a byte
+DEF_SET_IMM (int16_t, 256, imm_256)
+DEF_SET_IMM (int32_t, 256, imm_256)
+DEF_SET_IMM (int64_t, 256, imm_256)
+
+// No uint8_t variant - size too large for a byte
+DEF_SET_IMM (int16_t, 32512, imm_32512)
+DEF_SET_IMM (int32_t, 32512, imm_32512)
+DEF_SET_IMM (int64_t, 32512, imm_32512)
+
+// No uint8_t variant - size too large for a byte
+DEF_SET_IMM (int16_t, -32768, imm_m32768)
+DEF_SET_IMM (int32_t, -32768, imm_m32768)
+DEF_SET_IMM (int64_t, -32768, imm_m32768)
+
+/* gcc will generate:
+     dup z0.b, 0x01
+*/
+DEF_SET_IMM (int16_t, 0x0101, imm_01_pat)
+DEF_SET_IMM (int32_t, 0x01010101, imm_01_pat)
+DEF_SET_IMM (int64_t, 0x0101010101010101LL, imm_01_pat)
+
+/* gcc will generate:
+     dup z0.h, 0x01
+*/
+DEF_SET_IMM (int32_t, 0x00010001, imm_0001_pat)
+DEF_SET_IMM (int64_t, 0x0001000100010001LL, imm_0001_pat)
+
+/* gcc will generate:
+     dup z0.b, 0xFE (-2)
+*/
+DEF_SET_IMM (int16_t, 0xFEFE, imm_FE_pat)
+DEF_SET_IMM (int32_t, 0xFEFEFEFE, imm_FE_pat)
+DEF_SET_IMM (int64_t, 0xFEFEFEFEFEFEFEFE, imm_FE_pat)
+
+/* gcc will generate:
+     dup z0.h, 0xFFFE (-2)
+*/
+DEF_SET_IMM (int32_t, 0xFFFEFFFE, imm_FFFE_pat)
+DEF_SET_IMM (int64_t, 0xFFFEFFFEFFFEFFFELL, imm_FFFE_pat)
+
+/* gcc will generate:
+     dup z0.h, 0xFE00
+*/
+DEF_SET_IMM (int32_t, 0xFE00FE00, imm_FE00_pat)
+DEF_SET_IMM (int64_t, 0xFE00FE00FE00FE00LL, imm_FE00_pat)
+
+
+/* --- INVALID --- */
+
+// This shouldn't generate a dup as it's out of range, but also the compiler
+// shouldn't assert!
+DEF_SET_IMM (int32_t, 129, imm_m129)
+DEF_SET_IMM (int32_t, 32513, imm_32513)
+DEF_SET_IMM (int32_t, -32763, imm_m32763)
+
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #-1\n} } } */
+
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #0\n} } } */
+
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #1\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #1\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #1\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #1\n} } } */
+
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #127\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #127\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #127\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #127\n} } } */
+
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #-128\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #-128\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #-128\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #-128\n} } } */
+
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #256\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #256\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #256\n} } } */
+
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #32512\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #32512\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #32512\n} } } */
+
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #-32768\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #-32768\n} } } */
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #-32768\n} } } */
+
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #-2\n} } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #-2\n} 2 } } */
+
+/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #-512\n} } } */
+
+/* { dg-final { scan-assembler-not {#129\n} } } */
+/* { dg-final { scan-assembler-not {#32513\n} } } */
+/* { dg-final { scan-assembler-not {#-32763\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup_imm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/dup_imm_1_run.c
new file mode 100644 (file)
index 0000000..7f21ba7
--- /dev/null
@@ -0,0 +1,70 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 -fno-tree-loop-distribute-patterns" } */
+
+#include "dup_imm_1.c"
+
+#define TEST_SET_IMM(TYPE, IMM, SUFFIX)                \
+  {                                            \
+    TYPE v[NUM_ELEMS (TYPE)];                  \
+    set_##TYPE##_##SUFFIX (v);                 \
+    for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+      if (v[i] != (TYPE) IMM)                  \
+        __builtin_abort ();                    \
+  }
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+  TEST_SET_IMM (int8_t,  0, imm_0)
+  TEST_SET_IMM (int16_t, 0, imm_0)
+  TEST_SET_IMM (int32_t, 0, imm_0)
+  TEST_SET_IMM (int64_t, 0, imm_0)
+
+  TEST_SET_IMM (int8_t,  -1, imm_m1)
+  TEST_SET_IMM (int16_t, -1, imm_m1)
+  TEST_SET_IMM (int32_t, -1, imm_m1)
+  TEST_SET_IMM (int64_t, -1, imm_m1)
+
+  TEST_SET_IMM (int8_t,  1, imm_1)
+  TEST_SET_IMM (int16_t, 1, imm_1)
+  TEST_SET_IMM (int32_t, 1, imm_1)
+  TEST_SET_IMM (int64_t, 1, imm_1)
+
+  TEST_SET_IMM (int8_t,  127, imm_127)
+  TEST_SET_IMM (int16_t, 127, imm_127)
+  TEST_SET_IMM (int32_t, 127, imm_127)
+  TEST_SET_IMM (int64_t, 127, imm_127)
+
+  TEST_SET_IMM (int8_t,  -128, imm_m128)
+  TEST_SET_IMM (int16_t, -128, imm_m128)
+  TEST_SET_IMM (int32_t, -128, imm_m128)
+  TEST_SET_IMM (int64_t, -128, imm_m128)
+
+  TEST_SET_IMM (int16_t, 256, imm_256)
+  TEST_SET_IMM (int32_t, 256, imm_256)
+  TEST_SET_IMM (int64_t, 256, imm_256)
+
+  TEST_SET_IMM (int16_t, 32512, imm_32512)
+  TEST_SET_IMM (int32_t, 32512, imm_32512)
+  TEST_SET_IMM (int64_t, 32512, imm_32512)
+
+  TEST_SET_IMM (int16_t, -32768, imm_m32768)
+  TEST_SET_IMM (int32_t, -32768, imm_m32768)
+  TEST_SET_IMM (int64_t, -32768, imm_m32768)
+
+  TEST_SET_IMM (int16_t, 0x0101, imm_01_pat)
+  TEST_SET_IMM (int32_t, 0x01010101, imm_01_pat)
+  TEST_SET_IMM (int64_t, 0x0101010101010101LL, imm_01_pat)
+
+  TEST_SET_IMM (int32_t, 0x00010001, imm_0001_pat)
+  TEST_SET_IMM (int64_t, 0x0001000100010001LL, imm_0001_pat)
+
+  TEST_SET_IMM (int16_t, 0xFEFE, imm_FE_pat)
+  TEST_SET_IMM (int32_t, 0xFEFEFEFE, imm_FE_pat)
+  TEST_SET_IMM (int64_t, 0xFEFEFEFEFEFEFEFE, imm_FE_pat)
+
+  TEST_SET_IMM (int32_t, 0xFE00FE00, imm_FE00_pat)
+  TEST_SET_IMM (int64_t, 0xFE00FE00FE00FE00, imm_FE00_pat)
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/dup_lane_1.c
new file mode 100644 (file)
index 0000000..532847b
--- /dev/null
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define MASK_2(X) X, X
+#define MASK_4(X) MASK_2 (X), MASK_2 (X)
+#define MASK_8(X) MASK_4 (X), MASK_4 (X)
+#define MASK_16(X) MASK_8 (X), MASK_8 (X)
+#define MASK_32(X) MASK_16 (X), MASK_16 (X)
+
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
+
+#define DUP_LANE(TYPE, NUNITS, INDEX)                                       \
+  TYPE dup_##INDEX##_##TYPE (TYPE values1, TYPE values2)                    \
+  {                                                                         \
+    return __builtin_shuffle (values1, values2,                                     \
+                             ((INDEX_##NUNITS) { MASK_##NUNITS (INDEX) })); \
+  }
+
+#define TEST_ALL(T)                            \
+  T (vnx2di, 4, 0)                             \
+  T (vnx2di, 4, 2)                             \
+  T (vnx2di, 4, 3)                             \
+  T (vnx4si, 8, 0)                             \
+  T (vnx4si, 8, 5)                             \
+  T (vnx4si, 8, 7)                             \
+  T (vnx8hi, 16, 0)                            \
+  T (vnx8hi, 16, 6)                            \
+  T (vnx8hi, 16, 15)                           \
+  T (vnx16qi, 32, 0)                           \
+  T (vnx16qi, 32, 19)                          \
+  T (vnx16qi, 32, 31)                          \
+  T (vnx2df, 4, 0)                             \
+  T (vnx2df, 4, 2)                             \
+  T (vnx2df, 4, 3)                             \
+  T (vnx4sf, 8, 0)                             \
+  T (vnx4sf, 8, 5)                             \
+  T (vnx4sf, 8, 7)                             \
+  T (vnx8hf, 16, 0)                            \
+  T (vnx8hf, 16, 6)                            \
+  T (vnx8hf, 16, 15)                           \
+
+TEST_ALL (DUP_LANE)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[0\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[3\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[0\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[5\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[7\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[0\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[6\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[15\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[0\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[19\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[31\]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ext_1.c b/gcc/testsuite/gcc.target/aarch64/sve/ext_1.c
new file mode 100644 (file)
index 0000000..ba304e3
--- /dev/null
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define MASK_2(X) X, X + 1
+#define MASK_4(X) MASK_2 (X), MASK_2 (X + 2)
+#define MASK_8(X) MASK_4 (X), MASK_4 (X + 4)
+#define MASK_16(X) MASK_8 (X), MASK_8 (X + 8)
+#define MASK_32(X) MASK_16 (X), MASK_16 (X + 16)
+
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
+
+#define DUP_LANE(TYPE, NUNITS, INDEX)                                       \
+  TYPE dup_##INDEX##_##TYPE (TYPE values1, TYPE values2)                    \
+  {                                                                         \
+    return __builtin_shuffle (values1, values2,                                     \
+                             ((INDEX_##NUNITS) { MASK_##NUNITS (INDEX) })); \
+  }
+
+#define TEST_ALL(T)                            \
+  T (vnx2di, 4, 1)                             \
+  T (vnx2di, 4, 2)                             \
+  T (vnx2di, 4, 3)                             \
+  T (vnx4si, 8, 1)                             \
+  T (vnx4si, 8, 5)                             \
+  T (vnx4si, 8, 7)                             \
+  T (vnx8hi, 16, 1)                            \
+  T (vnx8hi, 16, 6)                            \
+  T (vnx8hi, 16, 15)                           \
+  T (vnx16qi, 32, 1)                           \
+  T (vnx16qi, 32, 19)                          \
+  T (vnx16qi, 32, 31)                          \
+  T (vnx2df, 4, 1)                             \
+  T (vnx2df, 4, 2)                             \
+  T (vnx2df, 4, 3)                             \
+  T (vnx4sf, 8, 1)                             \
+  T (vnx4sf, 8, 5)                             \
+  T (vnx4sf, 8, 7)                             \
+  T (vnx8hf, 16, 1)                            \
+  T (vnx8hf, 16, 6)                            \
+  T (vnx8hf, 16, 15)                           \
+
+TEST_ALL (DUP_LANE)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #2\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #4\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #8\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #12\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #16\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #19\n} 1 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #20\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #24\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #28\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #30\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #31\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ext_2.c b/gcc/testsuite/gcc.target/aarch64/sve/ext_2.c
new file mode 100644 (file)
index 0000000..0fe7e4c
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+typedef int vnx4si __attribute__((vector_size (32)));
+
+void
+foo (void)
+{
+  register vnx4si x asm ("z0");
+  register vnx4si y asm ("z1");
+
+  asm volatile ("" : "=w" (y));
+  x = __builtin_shuffle (y, y, (vnx4si) { 1, 2, 3, 4, 5, 6, 7, 8 });
+  asm volatile ("" :: "w" (x));
+}
+
+/* { dg-final { scan-assembler {\tmov\tz0\.d, z1\.d\n} } } */
+/* { dg-final { scan-assembler {\text\tz0\.b, z0\.b, z[01]\.b, #4\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c
new file mode 100644 (file)
index 0000000..df51ce3
--- /dev/null
@@ -0,0 +1,93 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define EXTRACT(ELT_TYPE, TYPE, INDEX)         \
+  ELT_TYPE permute_##TYPE##_##INDEX (void)     \
+  {                                            \
+    TYPE values;                               \
+    asm ("" : "=w" (values));                  \
+    return values[INDEX];                      \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int64_t, vnx2di, 0)                       \
+  T (int64_t, vnx2di, 1)                       \
+  T (int64_t, vnx2di, 2)                       \
+  T (int64_t, vnx2di, 3)                       \
+  T (int32_t, vnx4si, 0)                       \
+  T (int32_t, vnx4si, 1)                       \
+  T (int32_t, vnx4si, 3)                       \
+  T (int32_t, vnx4si, 4)                       \
+  T (int32_t, vnx4si, 7)                       \
+  T (int16_t, vnx8hi, 0)                       \
+  T (int16_t, vnx8hi, 1)                       \
+  T (int16_t, vnx8hi, 7)                       \
+  T (int16_t, vnx8hi, 8)                       \
+  T (int16_t, vnx8hi, 15)                      \
+  T (int8_t, vnx16qi, 0)                       \
+  T (int8_t, vnx16qi, 1)                       \
+  T (int8_t, vnx16qi, 15)                      \
+  T (int8_t, vnx16qi, 16)                      \
+  T (int8_t, vnx16qi, 31)                      \
+  T (double, vnx2df, 0)                                \
+  T (double, vnx2df, 1)                                \
+  T (double, vnx2df, 2)                                \
+  T (double, vnx2df, 3)                                \
+  T (float, vnx4sf, 0)                         \
+  T (float, vnx4sf, 1)                         \
+  T (float, vnx4sf, 3)                         \
+  T (float, vnx4sf, 4)                         \
+  T (float, vnx4sf, 7)                         \
+  T (_Float16, vnx8hf, 0)                      \
+  T (_Float16, vnx8hf, 1)                      \
+  T (_Float16, vnx8hf, 7)                      \
+  T (_Float16, vnx8hf, 8)                      \
+  T (_Float16, vnx8hf, 15)
+
+TEST_ALL (EXTRACT)
+
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+
+/* Also used to move the result of a non-Advanced SIMD extract.  */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+
+/* Also used to move the result of a non-Advanced SIMD extract.  */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c
new file mode 100644 (file)
index 0000000..0642604
--- /dev/null
@@ -0,0 +1,93 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=512 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx4di __attribute__((vector_size (64)));
+typedef int32_t vnx8si __attribute__((vector_size (64)));
+typedef int16_t vnx16hi __attribute__((vector_size (64)));
+typedef int8_t vnx32qi __attribute__((vector_size (64)));
+typedef double vnx4df __attribute__((vector_size (64)));
+typedef float vnx8sf __attribute__((vector_size (64)));
+typedef _Float16 vnx16hf __attribute__((vector_size (64)));
+
+#define EXTRACT(ELT_TYPE, TYPE, INDEX)         \
+  ELT_TYPE permute_##TYPE##_##INDEX (void)     \
+  {                                            \
+    TYPE values;                               \
+    asm ("" : "=w" (values));                  \
+    return values[INDEX];                      \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int64_t, vnx4di, 0)                       \
+  T (int64_t, vnx4di, 1)                       \
+  T (int64_t, vnx4di, 2)                       \
+  T (int64_t, vnx4di, 7)                       \
+  T (int32_t, vnx8si, 0)                       \
+  T (int32_t, vnx8si, 1)                       \
+  T (int32_t, vnx8si, 3)                       \
+  T (int32_t, vnx8si, 4)                       \
+  T (int32_t, vnx8si, 15)                      \
+  T (int16_t, vnx16hi, 0)                      \
+  T (int16_t, vnx16hi, 1)                      \
+  T (int16_t, vnx16hi, 7)                      \
+  T (int16_t, vnx16hi, 8)                      \
+  T (int16_t, vnx16hi, 31)                     \
+  T (int8_t, vnx32qi, 0)                       \
+  T (int8_t, vnx32qi, 1)                       \
+  T (int8_t, vnx32qi, 15)                      \
+  T (int8_t, vnx32qi, 16)                      \
+  T (int8_t, vnx32qi, 63)                      \
+  T (double, vnx4df, 0)                                \
+  T (double, vnx4df, 1)                                \
+  T (double, vnx4df, 2)                                \
+  T (double, vnx4df, 7)                                \
+  T (float, vnx8sf, 0)                         \
+  T (float, vnx8sf, 1)                         \
+  T (float, vnx8sf, 3)                         \
+  T (float, vnx8sf, 4)                         \
+  T (float, vnx8sf, 15)                                \
+  T (_Float16, vnx16hf, 0)                     \
+  T (_Float16, vnx16hf, 1)                     \
+  T (_Float16, vnx16hf, 7)                     \
+  T (_Float16, vnx16hf, 8)                     \
+  T (_Float16, vnx16hf, 31)
+
+TEST_ALL (EXTRACT)
+
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+
+/* Also used to move the result of a non-Advanced SIMD extract.  */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+
+/* Also used to move the result of a non-Advanced SIMD extract.  */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c
new file mode 100644 (file)
index 0000000..604f1f6
--- /dev/null
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=1024 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx8di __attribute__((vector_size (128)));
+typedef int32_t vnx16si __attribute__((vector_size (128)));
+typedef int16_t vnx32hi __attribute__((vector_size (128)));
+typedef int8_t vnx64qi __attribute__((vector_size (128)));
+typedef double vnx8df __attribute__((vector_size (128)));
+typedef float vnx16sf __attribute__((vector_size (128)));
+typedef _Float16 vnx32hf __attribute__((vector_size (128)));
+
+#define EXTRACT(ELT_TYPE, TYPE, INDEX)         \
+  ELT_TYPE permute_##TYPE##_##INDEX (void)     \
+  {                                            \
+    TYPE values;                               \
+    asm ("" : "=w" (values));                  \
+    return values[INDEX];                      \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int64_t, vnx8di, 0)                       \
+  T (int64_t, vnx8di, 1)                       \
+  T (int64_t, vnx8di, 2)                       \
+  T (int64_t, vnx8di, 7)                       \
+  T (int64_t, vnx8di, 8)                       \
+  T (int64_t, vnx8di, 9)                       \
+  T (int64_t, vnx8di, 15)                      \
+  T (int32_t, vnx16si, 0)                      \
+  T (int32_t, vnx16si, 1)                      \
+  T (int32_t, vnx16si, 3)                      \
+  T (int32_t, vnx16si, 4)                      \
+  T (int32_t, vnx16si, 15)                     \
+  T (int32_t, vnx16si, 16)                     \
+  T (int32_t, vnx16si, 21)                     \
+  T (int32_t, vnx16si, 31)                     \
+  T (int16_t, vnx32hi, 0)                      \
+  T (int16_t, vnx32hi, 1)                      \
+  T (int16_t, vnx32hi, 7)                      \
+  T (int16_t, vnx32hi, 8)                      \
+  T (int16_t, vnx32hi, 31)                     \
+  T (int16_t, vnx32hi, 32)                     \
+  T (int16_t, vnx32hi, 47)                     \
+  T (int16_t, vnx32hi, 63)                     \
+  T (int8_t, vnx64qi, 0)                       \
+  T (int8_t, vnx64qi, 1)                       \
+  T (int8_t, vnx64qi, 15)                      \
+  T (int8_t, vnx64qi, 16)                      \
+  T (int8_t, vnx64qi, 63)                      \
+  T (int8_t, vnx64qi, 64)                      \
+  T (int8_t, vnx64qi, 100)                     \
+  T (int8_t, vnx64qi, 127)                     \
+  T (double, vnx8df, 0)                                \
+  T (double, vnx8df, 1)                                \
+  T (double, vnx8df, 2)                                \
+  T (double, vnx8df, 7)                                \
+  T (double, vnx8df, 8)                                \
+  T (double, vnx8df, 9)                                \
+  T (double, vnx8df, 15)                       \
+  T (float, vnx16sf, 0)                                \
+  T (float, vnx16sf, 1)                                \
+  T (float, vnx16sf, 3)                                \
+  T (float, vnx16sf, 4)                                \
+  T (float, vnx16sf, 15)                       \
+  T (float, vnx16sf, 16)                       \
+  T (float, vnx16sf, 21)                       \
+  T (float, vnx16sf, 31)                       \
+  T (_Float16, vnx32hf, 0)                     \
+  T (_Float16, vnx32hf, 1)                     \
+  T (_Float16, vnx32hf, 7)                     \
+  T (_Float16, vnx32hf, 8)                     \
+  T (_Float16, vnx32hf, 31)                    \
+  T (_Float16, vnx32hf, 32)                    \
+  T (_Float16, vnx32hf, 47)                    \
+  T (_Float16, vnx32hf, 63)
+
+TEST_ALL (EXTRACT)
+
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+
+/* Also used to move the result of a non-Advanced SIMD extract.  */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 5 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+
+/* Also used to move the result of a non-Advanced SIMD extract.  */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 5 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #84\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #94\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c
new file mode 100644 (file)
index 0000000..8b45e31
--- /dev/null
@@ -0,0 +1,135 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int64_t v32di __attribute__((vector_size (256)));
+typedef int32_t v64si __attribute__((vector_size (256)));
+typedef int16_t v128hi __attribute__((vector_size (256)));
+typedef int8_t v256qi __attribute__((vector_size (256)));
+typedef double v32df __attribute__((vector_size (256)));
+typedef float v64sf __attribute__((vector_size (256)));
+typedef _Float16 v128hf __attribute__((vector_size (256)));
+
+#define EXTRACT(ELT_TYPE, TYPE, INDEX)         \
+  ELT_TYPE permute_##TYPE##_##INDEX (void)     \
+  {                                            \
+    TYPE values;                               \
+    asm ("" : "=w" (values));                  \
+    return values[INDEX];                      \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int64_t, v32di, 0)                                \
+  T (int64_t, v32di, 1)                                \
+  T (int64_t, v32di, 2)                                \
+  T (int64_t, v32di, 7)                                \
+  T (int64_t, v32di, 8)                                \
+  T (int64_t, v32di, 9)                                \
+  T (int64_t, v32di, 15)                       \
+  T (int64_t, v32di, 31)                       \
+  T (int32_t, v64si, 0)                                \
+  T (int32_t, v64si, 1)                                \
+  T (int32_t, v64si, 3)                                \
+  T (int32_t, v64si, 4)                                \
+  T (int32_t, v64si, 15)                       \
+  T (int32_t, v64si, 16)                       \
+  T (int32_t, v64si, 21)                       \
+  T (int32_t, v64si, 31)                       \
+  T (int32_t, v64si, 63)                       \
+  T (int16_t, v128hi, 0)                       \
+  T (int16_t, v128hi, 1)                       \
+  T (int16_t, v128hi, 7)                       \
+  T (int16_t, v128hi, 8)                       \
+  T (int16_t, v128hi, 31)                      \
+  T (int16_t, v128hi, 32)                      \
+  T (int16_t, v128hi, 47)                      \
+  T (int16_t, v128hi, 63)                      \
+  T (int16_t, v128hi, 127)                     \
+  T (int8_t, v256qi, 0)                                \
+  T (int8_t, v256qi, 1)                                \
+  T (int8_t, v256qi, 15)                       \
+  T (int8_t, v256qi, 16)                       \
+  T (int8_t, v256qi, 63)                       \
+  T (int8_t, v256qi, 64)                       \
+  T (int8_t, v256qi, 100)                      \
+  T (int8_t, v256qi, 127)                      \
+  T (int8_t, v256qi, 255)                      \
+  T (double, v32df, 0)                         \
+  T (double, v32df, 1)                         \
+  T (double, v32df, 2)                         \
+  T (double, v32df, 7)                         \
+  T (double, v32df, 8)                         \
+  T (double, v32df, 9)                         \
+  T (double, v32df, 15)                                \
+  T (double, v32df, 31)                                \
+  T (float, v64sf, 0)                          \
+  T (float, v64sf, 1)                          \
+  T (float, v64sf, 3)                          \
+  T (float, v64sf, 4)                          \
+  T (float, v64sf, 15)                         \
+  T (float, v64sf, 16)                         \
+  T (float, v64sf, 21)                         \
+  T (float, v64sf, 31)                         \
+  T (float, v64sf, 63)                         \
+  T (_Float16, v128hf, 0)                      \
+  T (_Float16, v128hf, 1)                      \
+  T (_Float16, v128hf, 7)                      \
+  T (_Float16, v128hf, 8)                      \
+  T (_Float16, v128hf, 31)                     \
+  T (_Float16, v128hf, 32)                     \
+  T (_Float16, v128hf, 47)                     \
+  T (_Float16, v128hf, 63)                     \
+  T (_Float16, v128hf, 127)
+
+TEST_ALL (EXTRACT)
+
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+
+/* Also used to move the result of a non-Advanced SIMD extract.  */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */
+/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+
+/* Also used to move the result of a non-Advanced SIMD extract.  */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #84\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #94\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #120\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #124\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #126\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #127\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fabs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fabs_1.c
new file mode 100644 (file)
index 0000000..5a4d6e0
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define DO_OPS(TYPE, OP)                       \
+void                                           \
+vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \
+{                                              \
+  for (int i = 0; i < count; ++i)              \
+    dst[i] = __builtin_##OP (src[i]);          \
+}
+
+DO_OPS (_Float16, fabsf)
+DO_OPS (float, fabsf)
+DO_OPS (double, fabs)
+
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fcvtz_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fcvtz_signed_1.c
new file mode 100644 (file)
index 0000000..d8882c7
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+vfcvtz_16 (int16_t *dst, _Float16 *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (int16_t) src1[i];
+}
+
+void __attribute__ ((noinline, noclone))
+vfcvtz_32 (int32_t *dst, float *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (int32_t) src1[i];
+}
+
+void __attribute__ ((noinline, noclone))
+vfcvtz_64 (int64_t *dst, double *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (int64_t) src1[i];
+}
+
+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fcvtz_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/fcvtz_signed_1_run.c
new file mode 100644 (file)
index 0000000..f29cf55
--- /dev/null
@@ -0,0 +1,47 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3" } */
+
+#include "fcvtz_signed_1.c"
+
+#define ARRAY_SIZE 81
+
+#define VAL1 ((i * 17) - 180)
+#define VAL2 ((i * 237.86) - (29 * 237.86))
+#define VAL3 ((double) ((i * 0xf8dfef2f) - (11 * 0xf8dfef2f)))
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  static int16_t array_dest16[ARRAY_SIZE];
+  static int32_t array_dest32[ARRAY_SIZE];
+  static int64_t array_dest64[ARRAY_SIZE];
+
+  _Float16 array_source16[ARRAY_SIZE];
+  float array_source32[ARRAY_SIZE];
+  double array_source64[ARRAY_SIZE];
+
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+      array_source16[i] = VAL1;
+      array_source32[i] = VAL2;
+      array_source64[i] = VAL3;
+      asm volatile ("" ::: "memory");
+    }
+
+  vfcvtz_16 (array_dest16, array_source16, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest16[i] != (int16_t) VAL1)
+      __builtin_abort ();
+
+  vfcvtz_32 (array_dest32, array_source32, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest32[i] != (int32_t) VAL2)
+      __builtin_abort ();
+
+  vfcvtz_64 (array_dest64, array_source64, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest64[i] != (int64_t) VAL3)
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fcvtz_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fcvtz_unsigned_1.c
new file mode 100644 (file)
index 0000000..9d735a8
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+vfcvtz_16 (uint16_t *dst, _Float16 *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (uint16_t) src1[i];
+}
+
+void __attribute__ ((noinline, noclone))
+vfcvtz_32 (uint32_t *dst, float *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (uint32_t) src1[i];
+}
+
+void __attribute__ ((noinline, noclone))
+vfcvtz_64 (uint64_t *dst, double *src1, int size)
+{
+  for (int i = 0; i < size; i++)
+    dst[i] = (uint64_t) src1[i];
+}
+
+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fcvtz_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/fcvtz_unsigned_1_run.c
new file mode 100644 (file)
index 0000000..8c193c7
--- /dev/null
@@ -0,0 +1,47 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "fcvtz_unsigned_1.c"
+
+#define ARRAY_SIZE 75
+
+#define VAL1 (i * 19)
+#define VAL2 (i * 2574.33)
+#define VAL3 ((double) (i * 0xff23efef))
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  static uint16_t array_dest16[ARRAY_SIZE];
+  static uint32_t array_dest32[ARRAY_SIZE];
+  static uint64_t array_dest64[ARRAY_SIZE];
+
+  _Float16 array_source16[ARRAY_SIZE];
+  float array_source32[ARRAY_SIZE];
+  double array_source64[ARRAY_SIZE];
+
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+      array_source16[i] = VAL1;
+      array_source32[i] = VAL2;
+      array_source64[i] = VAL3;
+      asm volatile ("" ::: "memory");
+    }
+
+  vfcvtz_16 (array_dest16, array_source16, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest16[i] != (uint16_t) VAL1)
+      __builtin_abort ();
+
+  vfcvtz_32 (array_dest32, array_source32, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest32[i] != (uint32_t) VAL2)
+      __builtin_abort ();
+
+  vfcvtz_64 (array_dest64, array_source64, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest64[i] != (uint64_t) VAL3)
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fdiv_1.c
new file mode 100644 (file)
index 0000000..a930ec5
--- /dev/null
@@ -0,0 +1,43 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                            \
+void vdiv_##TYPE (TYPE *x, TYPE y)             \
+{                                              \
+  register TYPE dst asm("z0");                 \
+  register TYPE src asm("z2");                 \
+  dst = *x;                                    \
+  src = y;                                     \
+  asm volatile ("" :: "w" (dst), "w" (src));   \
+  dst = dst / src;                             \
+  asm volatile ("" :: "w" (dst));              \
+  *x = dst;                                    \
+}                                              \
+void vdivr_##TYPE (TYPE *x, TYPE y)            \
+{                                              \
+  register TYPE dst asm("z0");                 \
+  register TYPE src asm("z2");                 \
+  dst = *x;                                    \
+  src = y;                                     \
+  asm volatile ("" :: "w" (dst), "w" (src));   \
+  dst = src / dst;                             \
+  asm volatile ("" :: "w" (dst));              \
+  *x = dst;                                    \
+}
+
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
+
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fdup_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fdup_1.c
new file mode 100644 (file)
index 0000000..c13efd8
--- /dev/null
@@ -0,0 +1,62 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* -fno-tree-loop-distribute-patterns prevents conversion to memset.  */
+/* { dg-options "-O3 -fno-tree-loop-distribute-patterns --save-temps" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE))
+
+#define DEF_SET_IMM(TYPE, IMM, SUFFIX)         \
+void __attribute__ ((noinline, noclone))       \
+set_##TYPE##_##SUFFIX (TYPE *a)                        \
+{                                              \
+  for (int i = 0; i < NUM_ELEMS (TYPE); i++)   \
+    a[i] = IMM;                                        \
+}
+
+#define DEF_SET_IMM_FP(IMM, SUFFIX) \
+  DEF_SET_IMM (float, IMM, SUFFIX)  \
+  DEF_SET_IMM (double, IMM, SUFFIX)
+
+/* Valid.  */
+DEF_SET_IMM_FP (1, imm1)
+DEF_SET_IMM_FP (0x1.1p0, imm1p0)
+DEF_SET_IMM_FP (0x1.fp0, immfp0)
+DEF_SET_IMM_FP (0x1.1p4, imm1p4)
+DEF_SET_IMM_FP (0x1.1p-3, imm1pm3)
+DEF_SET_IMM_FP (0x1.fp4, immfp4)
+DEF_SET_IMM_FP (0x1.fp-3, immfpm3)
+
+/* Should use MOV instead.  */
+DEF_SET_IMM_FP (0, imm0)
+
+/* Invalid.  */
+DEF_SET_IMM_FP (0x1.1fp0, imm1fp0)
+DEF_SET_IMM_FP (0x1.1p5, imm1p5)
+DEF_SET_IMM_FP (0x1.1p-4, imm1pm4)
+DEF_SET_IMM_FP (0x1.1fp5, imm1fp5)
+DEF_SET_IMM_FP (0x1.1fp-4, imm1fpm4)
+
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s,} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1.0e\+0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1.0625e\+0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1.9375e\+0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1.7e\+1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1.328125e-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #3.1e\+1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2.421875e-1\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d,} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.0e\+0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.0625e\+0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.9375e\+0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.7e\+1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.328125e-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3.1e\+1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2.421875e-1\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #0\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fdup_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/fdup_1_run.c
new file mode 100644 (file)
index 0000000..0623e93
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O3 -fno-tree-loop-distribute-patterns" } */
+
+#include "fdup_1.c"
+
+#define TEST_SET_IMM(TYPE,IMM,SUFFIX)          \
+  {                                            \
+    TYPE v[NUM_ELEMS (TYPE)];                  \
+    set_##TYPE##_##SUFFIX (v);                 \
+    for (int i = 0; i < NUM_ELEMS (TYPE); i++ )        \
+      if (v[i] != IMM)                         \
+       __builtin_abort ();                     \
+  }
+
+#define TEST_SET_IMM_FP(IMM, SUFFIX) \
+  TEST_SET_IMM (float, IMM, SUFFIX)  \
+  TEST_SET_IMM (double, IMM, SUFFIX)
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+  TEST_SET_IMM_FP (1, imm1)
+  TEST_SET_IMM_FP (0x1.1p0, imm1p0)
+  TEST_SET_IMM_FP (0x1.fp0, immfp0)
+  TEST_SET_IMM_FP (0x1.1p4, imm1p4)
+  TEST_SET_IMM_FP (0x1.1p-3, imm1pm3)
+  TEST_SET_IMM_FP (0x1.fp4, immfp4)
+  TEST_SET_IMM_FP (0x1.fp-3, immfpm3)
+
+  TEST_SET_IMM_FP (0, imm0)
+  TEST_SET_IMM_FP (0x1.1fp0, imm1fp0)
+  TEST_SET_IMM_FP (0x1.1p5, imm1p5)
+  TEST_SET_IMM_FP (0x1.1p-4, imm1pm4)
+  TEST_SET_IMM_FP (0x1.1fp5, imm1fp5)
+  TEST_SET_IMM_FP (0x1.1fp-4, imm1fpm4)
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fmad_1.c
new file mode 100644 (file)
index 0000000..d9aa08b
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmad##TYPE (TYPE *x, TYPE y, TYPE z)                      \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = (dst * src1) + src2;                                   \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
+
+/* { dg-final { scan-assembler-times {\tfmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fmla_1.c
new file mode 100644 (file)
index 0000000..4d66672
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmad##TYPE (TYPE *x, TYPE y, TYPE z)                      \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = (src1 * src2) + dst;                                   \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
+
+/* { dg-final { scan-assembler-times {\tfmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fmls_1.c
new file mode 100644 (file)
index 0000000..f37727c
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmad##TYPE (TYPE *x, TYPE y, TYPE z)                      \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = (-src1 * src2) + dst;                                  \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
+
+/* { dg-final { scan-assembler-times {\tfmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fmsb_1.c
new file mode 100644 (file)
index 0000000..c448b96
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmad##TYPE (TYPE *x, TYPE y, TYPE z)                      \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = (-dst * src1) + src2;                                  \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
+
+/* { dg-final { scan-assembler-times {\tfmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fmul_1.c
new file mode 100644 (file)
index 0000000..4a3e7c0
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#define DO_REGREG_OPS(TYPE, OP, NAME)                          \
+void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count)  \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] OP src[i];                                 \
+}
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME)                \
+void varithimm_##NAME##_##TYPE (TYPE *dst, int count)  \
+{                                                      \
+  for (int i = 0; i < count; ++i)                      \
+    dst[i] = dst[i] OP (TYPE) VALUE;                   \
+}
+
+#define DO_ARITH_OPS(TYPE, OP, NAME)                           \
+  DO_REGREG_OPS (TYPE, OP, NAME);                              \
+  DO_IMMEDIATE_OPS (0.5, TYPE, OP, NAME ## 0point5);           \
+  DO_IMMEDIATE_OPS (2, TYPE, OP, NAME ## 2);                   \
+  DO_IMMEDIATE_OPS (5, TYPE, OP, NAME ## 5);                   \
+  DO_IMMEDIATE_OPS (-0.5, TYPE, OP, NAME ## minus0point5);     \
+  DO_IMMEDIATE_OPS (-2, TYPE, OP, NAME ## minus2);
+
+DO_ARITH_OPS (_Float16, *, mul)
+DO_ARITH_OPS (float, *, mul)
+DO_ARITH_OPS (double, *, mul)
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #5} } } */
+/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #5} } } */
+/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #5} } } */
+/* { dg-final { scan-assembler-not   {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #-} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg_1.c
new file mode 100644 (file)
index 0000000..4357ee9
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#define DO_OPS(TYPE)                                   \
+void vneg_##TYPE (TYPE *dst, TYPE *src, int count)     \
+{                                                      \
+  for (int i = 0; i < count; ++i)                      \
+    dst[i] = -src[i];                                  \
+}
+
+DO_OPS (_Float16)
+DO_OPS (float)
+DO_OPS (double)
+
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fnmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fnmad_1.c
new file mode 100644 (file)
index 0000000..df61f4a
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmad##TYPE (TYPE *x, TYPE y, TYPE z)                      \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = (-dst * src1) - src2;                                  \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
+
+/* { dg-final { scan-assembler-times {\tfnmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fnmla_1.c
new file mode 100644 (file)
index 0000000..1e70658
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmad##TYPE (TYPE *x, TYPE y, TYPE z)                      \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = (-src1 * src2) - dst;                                  \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
+
+/* { dg-final { scan-assembler-times {\tfnmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmla\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fnmls_1.c
new file mode 100644 (file)
index 0000000..18fa236
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmad##TYPE (TYPE *x, TYPE y, TYPE z)                      \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = (src1 * src2) - dst;                                   \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
+
+/* { dg-final { scan-assembler-times {\tfnmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmls\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fnmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fnmsb_1.c
new file mode 100644 (file)
index 0000000..08c5ff3
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options " -O3 -msve-vector-bits=256 --save-temps" } */
+
+typedef _Float16 vnx8hf __attribute__((vector_size(32)));
+typedef float vnx4sf __attribute__((vector_size(32)));
+typedef double vnx2df __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmad##TYPE (TYPE *x, TYPE y, TYPE z)                      \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = (dst * src1) - src2;                                   \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx8hf)
+DO_OP (vnx4sf)
+DO_OP (vnx2df)
+
+/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fp_arith_1.c
new file mode 100644 (file)
index 0000000..5aed0dc
--- /dev/null
@@ -0,0 +1,71 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#define DO_REGREG_OPS(TYPE, OP, NAME)                          \
+void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count)  \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] OP src[i];                                 \
+}
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME)                \
+void varithimm_##NAME##_##TYPE (TYPE *dst, int count)  \
+{                                                      \
+  for (int i = 0; i < count; ++i)                      \
+    dst[i] = dst[i] OP (TYPE) VALUE;                   \
+}
+
+#define DO_ARITH_OPS(TYPE, OP, NAME)                           \
+  DO_REGREG_OPS (TYPE, OP, NAME);                              \
+  DO_IMMEDIATE_OPS (1, TYPE, OP, NAME ## 1);                   \
+  DO_IMMEDIATE_OPS (0.5, TYPE, OP, NAME ## pointfive);         \
+  DO_IMMEDIATE_OPS (2, TYPE, OP, NAME ## 2);                   \
+  DO_IMMEDIATE_OPS (2.5, TYPE, OP, NAME ## twopoint5);         \
+  DO_IMMEDIATE_OPS (-0.5, TYPE, OP, NAME ## minuspointfive);   \
+  DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1);
+
+DO_ARITH_OPS (_Float16, +, add)
+DO_ARITH_OPS (float, +, add)
+DO_ARITH_OPS (double, +, add)
+
+DO_ARITH_OPS (_Float16, -, minus)
+DO_ARITH_OPS (float, -, minus)
+DO_ARITH_OPS (double, -, minus)
+
+/* No specific count because it's valid to use fadd or fsub for the
+   out-of-range constants.  */
+/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */
+
+/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */
+
+/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */
+
+/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */
+
+/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #-} } } */
+
+/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #-} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/frinta_1.c b/gcc/testsuite/gcc.target/aarch64/sve/frinta_1.c
new file mode 100644 (file)
index 0000000..f099666
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define DO_OPS(TYPE, OP)                       \
+void                                           \
+vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \
+{                                              \
+  for (int i = 0; i < count; ++i)              \
+    dst[i] = __builtin_##OP (src[i]);          \
+}
+
+DO_OPS (float, roundf)
+DO_OPS (double, round)
+
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/frinti_1.c b/gcc/testsuite/gcc.target/aarch64/sve/frinti_1.c
new file mode 100644 (file)
index 0000000..7a6b2f8
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define DO_OPS(TYPE, OP)                       \
+void                                           \
+vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \
+{                                              \
+  for (int i = 0; i < count; ++i)              \
+    dst[i] = __builtin_##OP (src[i]);          \
+}
+
+DO_OPS (float, nearbyintf)
+DO_OPS (double, nearbyint)
+
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/frintm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/frintm_1.c
new file mode 100644 (file)
index 0000000..f3c0956
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define DO_OPS(TYPE, OP)                       \
+void                                           \
+vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \
+{                                              \
+  for (int i = 0; i < count; ++i)              \
+    dst[i] = __builtin_##OP (src[i]);          \
+}
+
+DO_OPS (float, floorf)
+DO_OPS (double, floor)
+
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/frintp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/frintp_1.c
new file mode 100644 (file)
index 0000000..5fb1eb3
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define DO_OPS(TYPE, OP)                       \
+void                                           \
+vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \
+{                                              \
+  for (int i = 0; i < count; ++i)              \
+    dst[i] = __builtin_##OP (src[i]);          \
+}
+
+DO_OPS (float, ceilf)
+DO_OPS (double, ceil)
+
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/frintx_1.c b/gcc/testsuite/gcc.target/aarch64/sve/frintx_1.c
new file mode 100644 (file)
index 0000000..3437533
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define DO_OPS(TYPE, OP)                       \
+void                                           \
+vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \
+{                                              \
+  for (int i = 0; i < count; ++i)              \
+    dst[i] = __builtin_##OP (src[i]);          \
+}
+
+DO_OPS (float, rintf)
+DO_OPS (double, rint)
+
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/frintz_1.c b/gcc/testsuite/gcc.target/aarch64/sve/frintz_1.c
new file mode 100644 (file)
index 0000000..08a837a
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define DO_OPS(TYPE, OP)                       \
+void                                           \
+vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \
+{                                              \
+  for (int i = 0; i < count; ++i)              \
+    dst[i] = __builtin_##OP (src[i]);          \
+}
+
+DO_OPS (float, truncf)
+DO_OPS (double, trunc)
+
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fsqrt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fsqrt_1.c
new file mode 100644 (file)
index 0000000..55f1e26
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-math-errno --save-temps" } */
+
+#define DO_OPS(TYPE, OP)                       \
+void                                           \
+vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \
+{                                              \
+  for (int i = 0; i < count; ++i)              \
+    dst[i] = __builtin_##OP (src[i]);          \
+}
+
+DO_OPS (float, sqrtf)
+DO_OPS (double, sqrt)
+
+/* { dg-final { scan-assembler-times {\tfsqrt\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsqrt\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fsubr_1.c
new file mode 100644 (file)
index 0000000..f47a360
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME)                    \
+void vsubrarithimm_##NAME##_##TYPE (TYPE *dst, int count)      \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = (TYPE) VALUE - dst[i];                            \
+}
+
+#define DO_ARITH_OPS(TYPE)                     \
+  DO_IMMEDIATE_OPS (0, TYPE, 0);               \
+  DO_IMMEDIATE_OPS (1, TYPE, 1);               \
+  DO_IMMEDIATE_OPS (0.5, TYPE, 0point5);       \
+  DO_IMMEDIATE_OPS (2, TYPE, 2);               \
+  DO_IMMEDIATE_OPS (3.5, TYPE, 3point5);
+
+DO_ARITH_OPS (_Float16)
+DO_ARITH_OPS (float)
+DO_ARITH_OPS (double)
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #3} } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #3} } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */
+/* { dg-final { scan-assembler-not   {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #3} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/index_1.c
new file mode 100644 (file)
index 0000000..5ab254d
--- /dev/null
@@ -0,0 +1,94 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE))
+
+#define DEF_LOOP(TYPE, BASE, STEP, SUFFIX)     \
+void __attribute__ ((noinline, noclone))       \
+loop_##TYPE##_##SUFFIX (TYPE *a)               \
+{                                              \
+  for (int i = 0; i < NUM_ELEMS (TYPE); ++i)   \
+    a[i] = (BASE) + i * (STEP);                        \
+}
+
+#define TEST_ALL_UNSIGNED_TYPES(T, BASE, STEP, SUFFIX) \
+  T (uint8_t,  BASE, STEP, SUFFIX)                     \
+  T (uint16_t, BASE, STEP, SUFFIX)                     \
+  T (uint32_t, BASE, STEP, SUFFIX)                     \
+  T (uint64_t, BASE, STEP, SUFFIX)
+
+#define TEST_ALL_SIGNED_TYPES(T, BASE, STEP, SUFFIX)   \
+  T (int8_t,  BASE, STEP, SUFFIX)                      \
+  T (int16_t, BASE, STEP, SUFFIX)                      \
+  T (int32_t, BASE, STEP, SUFFIX)                      \
+  T (int64_t, BASE, STEP, SUFFIX)
+
+/* Immediate loops.  */
+#define TEST_IMMEDIATE(T)                      \
+  TEST_ALL_UNSIGNED_TYPES (T, 0, 1, b0s1)      \
+  TEST_ALL_SIGNED_TYPES (T, 0, 1, b0s1)                \
+  TEST_ALL_UNSIGNED_TYPES (T, 0, 15, b0s15)    \
+  TEST_ALL_SIGNED_TYPES (T, 0, 15, b0s15)      \
+  TEST_ALL_SIGNED_TYPES (T, 0, -1, b0sm1)      \
+  TEST_ALL_SIGNED_TYPES (T, 0, -16, b0sm16)    \
+  TEST_ALL_SIGNED_TYPES (T, -16, 1, bm16s1)    \
+  TEST_ALL_UNSIGNED_TYPES (T, 15, 1, b15s1)    \
+  TEST_ALL_SIGNED_TYPES (T, 15, 1, b15s1)
+
+/* Non-immediate loops.  */
+#define TEST_NONIMMEDIATE(T)                   \
+  TEST_ALL_UNSIGNED_TYPES (T, 0, 16, b0s16)    \
+  TEST_ALL_SIGNED_TYPES (T, 0, 16, b0s16)      \
+  TEST_ALL_SIGNED_TYPES (T, 0, -17, b0sm17)    \
+  TEST_ALL_SIGNED_TYPES (T, -17, 1, bm17s1)    \
+  TEST_ALL_UNSIGNED_TYPES (T, 16, 1, b16s1)    \
+  TEST_ALL_SIGNED_TYPES (T, 16, 1, b16s1)      \
+  TEST_ALL_UNSIGNED_TYPES (T, 16, 16, b16s16)  \
+  TEST_ALL_SIGNED_TYPES (T, 16, 16, b16s16)    \
+  TEST_ALL_SIGNED_TYPES (T, -17, -17, bm17sm17)
+
+#define TEST_ALL(T) TEST_IMMEDIATE (T) TEST_NONIMMEDIATE (T)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, #15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, #-16\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #-16, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #15, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, w[0-9]+\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, #1\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, w[0-9]+\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, #15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, #-16\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #-16, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #15, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, w[0-9]+\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #1\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, w[0-9]+\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, #15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, #-16\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #-16, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #15, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, w[0-9]+\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #1\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, #15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, #-1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, #-16\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #-16, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #15, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, x[0-9]+\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/index_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/index_1_run.c
new file mode 100644 (file)
index 0000000..abc6918
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256" } */
+
+#include "index_1.c"
+
+#define TEST_LOOP(TYPE, BASE, STEP, SUFFIX)    \
+  {                                            \
+    TYPE array[NUM_ELEMS (TYPE)] = {};         \
+    loop_##TYPE##_##SUFFIX (array);            \
+    for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+      if (array[i] != (TYPE) (BASE + i * STEP))        \
+       __builtin_abort ();                     \
+  }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ld1r_1.c b/gcc/testsuite/gcc.target/aarch64/sve/ld1r_1.c
new file mode 100644 (file)
index 0000000..415de24
--- /dev/null
@@ -0,0 +1,53 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+#define DUP4(X) X, X, X, X
+#define DUP8(X) DUP4 (X), DUP4 (X)
+#define DUP16(X) DUP8 (X), DUP8 (X)
+#define DUP32(X) DUP16 (X), DUP16 (X)
+
+typedef uint8_t vuint8_t __attribute__ ((vector_size (32)));
+typedef uint16_t vuint16_t __attribute__ ((vector_size (32)));
+typedef uint32_t vuint32_t __attribute__ ((vector_size (32)));
+typedef uint64_t vuint64_t __attribute__ ((vector_size (32)));
+
+#define TEST(TYPE, NAME, INIT)                                 \
+  void                                                         \
+  NAME##_##TYPE (TYPE *dest, __typeof__(dest[0][0]) *ptr)      \
+  {                                                            \
+    TYPE x = { INIT };                                         \
+    *dest = x;                                                 \
+  }
+
+#define TEST_GROUP(TYPE, NAME, DUP)            \
+  TEST (TYPE, NAME_##m1, DUP (ptr[-1]))                \
+  TEST (TYPE, NAME_##0, DUP (ptr[0]))          \
+  TEST (TYPE, NAME_##63, DUP (ptr[63]))                \
+  TEST (TYPE, NAME_##64, DUP (ptr[64]))
+
+TEST_GROUP (vuint8_t, t8, DUP32)
+TEST_GROUP (vuint16_t, t16, DUP16)
+TEST_GROUP (vuint32_t, t16, DUP8)
+TEST_GROUP (vuint64_t, t16, DUP4)
+
+/* { dg-final { scan-assembler-not {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1, -1\]\n} } } */
+/* { dg-final { scan-assembler {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1\]\n} } } */
+/* { dg-final { scan-assembler {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1, 63\]\n} } } */
+/* { dg-final { scan-assembler-not {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1, 64\]\n} } } */
+
+/* { dg-final { scan-assembler-not {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1, -1\]\n} } } */
+/* { dg-final { scan-assembler {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1\]\n} } } */
+/* { dg-final { scan-assembler {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1, 126\]\n} } } */
+/* { dg-final { scan-assembler-not {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1, 128\]\n} } } */
+
+/* { dg-final { scan-assembler-not {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1, -1\]\n} } } */
+/* { dg-final { scan-assembler {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1\]\n} } } */
+/* { dg-final { scan-assembler {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1, 252\]\n} } } */
+/* { dg-final { scan-assembler-not {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1, 256\]\n} } } */
+
+/* { dg-final { scan-assembler-not {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1, -1\]\n} } } */
+/* { dg-final { scan-assembler {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1\]\n} } } */
+/* { dg-final { scan-assembler {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1, 504\]\n} } } */
+/* { dg-final { scan-assembler-not {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1, 512\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_const_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/load_const_offset_1.c
new file mode 100644 (file)
index 0000000..501ef5d
--- /dev/null
@@ -0,0 +1,79 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__ ((vector_size (32)));
+typedef int32_t vnx4si __attribute__ ((vector_size (32)));
+typedef int16_t vnx8hi __attribute__ ((vector_size (32)));
+typedef int8_t vnx16qi __attribute__ ((vector_size (32)));
+
+#define TEST_TYPE(TYPE)                                                \
+  void sve_load_##TYPE##_neg9 (TYPE *a)                                \
+  {                                                            \
+    register TYPE x asm ("z0") = a[-9];                                \
+    asm volatile ("" :: "w" (x));                              \
+  }                                                            \
+                                                               \
+  void sve_load_##TYPE##_neg8 (TYPE *a)                                \
+  {                                                            \
+    register TYPE x asm ("z0") = a[-8];                                \
+    asm volatile ("" :: "w" (x));                              \
+  }                                                            \
+                                                               \
+  void sve_load_##TYPE##_0 (TYPE *a)                           \
+  {                                                            \
+    register TYPE x asm ("z0") = a[0];                         \
+    asm volatile ("" :: "w" (x));                              \
+  }                                                            \
+                                                               \
+  void sve_load_##TYPE##_unaligned (TYPE *a)                   \
+  {                                                            \
+    register TYPE x asm ("z0") = *(TYPE *) ((char *) a + 16);  \
+    asm volatile ("" :: "w" (x));                              \
+  }                                                            \
+                                                               \
+  void sve_load_##TYPE##_7 (TYPE *a)                           \
+  {                                                            \
+    register TYPE x asm ("z0") = a[7];                         \
+    asm volatile ("" :: "w" (x));                              \
+  }                                                            \
+                                                               \
+  void sve_load_##TYPE##_8 (TYPE *a)                           \
+  {                                                            \
+    register TYPE x asm ("z0") = a[8];                         \
+    asm volatile ("" :: "w" (x));                              \
+  }
+
+TEST_TYPE (vnx2di)
+TEST_TYPE (vnx4si)
+TEST_TYPE (vnx8hi)
+TEST_TYPE (vnx16qi)
+
+/* { dg-final { scan-assembler-times {\tsub\tx[0-9]+, x0, #288\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 16\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 256\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tld1d\tz0\.d, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0\]\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tld1d\tz0\.d, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler-not {\tld1w\tz0\.s, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0\]\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tld1w\tz0\.s, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler-not {\tld1h\tz0\.h, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0\]\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tld1h\tz0\.h, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */
+
+/* { dg-final { scan-assembler-not {\tld1b\tz0\.b, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0\]\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tld1b\tz0\.b, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_const_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/load_const_offset_2.c
new file mode 100644 (file)
index 0000000..25e84e3
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -save-temps" } */
+
+void
+f (unsigned int *restrict a, signed char *restrict b, signed char mask, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] += (signed char) (b[i] | mask);
+}
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #1, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #2, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #3, mul vl\]\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_const_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve/load_const_offset_3.c
new file mode 100644 (file)
index 0000000..1c8bd88
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -save-temps -msve-vector-bits=256" } */
+
+#include "load_const_offset_2.c"
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #1, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #2, mul vl\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #3, mul vl\]\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/load_scalar_offset_1.c
new file mode 100644 (file)
index 0000000..3290535
--- /dev/null
@@ -0,0 +1,70 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__ ((vector_size (32)));
+typedef int32_t vnx4si __attribute__ ((vector_size (32)));
+typedef int16_t vnx8hi __attribute__ ((vector_size (32)));
+typedef int8_t vnx16qi __attribute__ ((vector_size (32)));
+
+void sve_load_64_u_lsl (uint64_t *a)
+{
+  register unsigned long i asm("x1");
+  asm volatile ("" : "=r" (i));
+  asm volatile ("" :: "w" (*(vnx2di *)&a[i]));
+}
+
+void sve_load_64_s_lsl (int64_t *a)
+{
+  register long i asm("x1");
+  asm volatile ("" : "=r" (i));
+  asm volatile ("" :: "w" (*(vnx2di *)&a[i]));
+}
+
+void sve_load_32_u_lsl (uint32_t *a)
+{
+  register unsigned long i asm("x1");
+  asm volatile ("" : "=r" (i));
+  asm volatile ("" :: "w" (*(vnx4si *)&a[i]));
+}
+
+void sve_load_32_s_lsl (int32_t *a)
+{
+  register long i asm("x1");
+  asm volatile ("" : "=r" (i));
+  asm volatile ("" :: "w" (*(vnx4si *)&a[i]));
+}
+
+void sve_load_16_z_lsl (uint16_t *a)
+{
+  register unsigned long i asm("x1");
+  asm volatile ("" : "=r" (i));
+  asm volatile ("" :: "w" (*(vnx8hi *)&a[i]));
+}
+
+void sve_load_16_s_lsl (int16_t *a)
+{
+  register long i asm("x1");
+  asm volatile ("" : "=r" (i));
+  asm volatile ("" :: "w" (*(vnx8hi *)&a[i]));
+}
+
+void sve_load_8_z (uint8_t *a)
+{
+  register unsigned long i asm("x1");
+  asm volatile ("" : "=r" (i));
+  asm volatile ("" :: "w" (*(vnx16qi *)&a[i]));
+}
+
+void sve_load_8_s (int8_t *a)
+{
+  register long i asm("x1");
+  asm volatile ("" : "=r" (i));
+  asm volatile ("" :: "w" (*(vnx16qi *)&a[i]));
+}
+
+/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, x1, lsl 3\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0, x1, lsl 2\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0, x1, lsl 1\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0, x1\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_1.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_1.c
new file mode 100644 (file)
index 0000000..25cd908
--- /dev/null
@@ -0,0 +1,277 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#define DO_CONSTANT(VALUE, TYPE, OP, NAME)                     \
+void vlogical_imm_##NAME##_##TYPE (TYPE *dst, int count)       \
+{                                                              \
+  for (int i = 0; i < count; i++)                              \
+    dst[i] = dst[i] OP VALUE;                                  \
+}
+
+#define DO_LOGICAL_OPS_BRIEF(TYPE, OP, NAME)   \
+  DO_CONSTANT (1, TYPE, OP, NAME ## 1)         \
+  DO_CONSTANT (2, TYPE, OP, NAME ## 2)         \
+  DO_CONSTANT (5, TYPE, OP, NAME ## 5)         \
+  DO_CONSTANT (6, TYPE, OP, NAME ## 6)         \
+  DO_CONSTANT (8, TYPE, OP, NAME ## 8)         \
+  DO_CONSTANT (9, TYPE, OP, NAME ## 9)         \
+  DO_CONSTANT (-1, TYPE, OP, NAME ## minus1)   \
+  DO_CONSTANT (-2, TYPE, OP, NAME ## minus2)   \
+  DO_CONSTANT (-5, TYPE, OP, NAME ## minus5)   \
+  DO_CONSTANT (-6, TYPE, OP, NAME ## minus6)
+
+#define DO_LOGICAL_OPS(TYPE, OP, NAME)                         \
+  DO_CONSTANT (1, TYPE, OP, NAME ## 1)                         \
+  DO_CONSTANT (2, TYPE, OP, NAME ## 2)                         \
+  DO_CONSTANT (3, TYPE, OP, NAME ## 3)                         \
+  DO_CONSTANT (4, TYPE, OP, NAME ## 4)                         \
+  DO_CONSTANT (5, TYPE, OP, NAME ## 5)                         \
+  DO_CONSTANT (6, TYPE, OP, NAME ## 6)                         \
+  DO_CONSTANT (7, TYPE, OP, NAME ## 7)                         \
+  DO_CONSTANT (8, TYPE, OP, NAME ## 8)                         \
+  DO_CONSTANT (9, TYPE, OP, NAME ## 9)                         \
+  DO_CONSTANT (10, TYPE, OP, NAME ## 10)                       \
+  DO_CONSTANT (11, TYPE, OP, NAME ## 11)                       \
+  DO_CONSTANT (12, TYPE, OP, NAME ## 12)                       \
+  DO_CONSTANT (13, TYPE, OP, NAME ## 13)                       \
+  DO_CONSTANT (14, TYPE, OP, NAME ## 14)                       \
+  DO_CONSTANT (15, TYPE, OP, NAME ## 15)                       \
+  DO_CONSTANT (16, TYPE, OP, NAME ## 16)                       \
+  DO_CONSTANT (17, TYPE, OP, NAME ## 17)                       \
+  DO_CONSTANT (18, TYPE, OP, NAME ## 18)                       \
+  DO_CONSTANT (19, TYPE, OP, NAME ## 19)                       \
+  DO_CONSTANT (20, TYPE, OP, NAME ## 20)                       \
+  DO_CONSTANT (21, TYPE, OP, NAME ## 21)                       \
+  DO_CONSTANT (22, TYPE, OP, NAME ## 22)                       \
+  DO_CONSTANT (23, TYPE, OP, NAME ## 23)                       \
+  DO_CONSTANT (24, TYPE, OP, NAME ## 24)                       \
+  DO_CONSTANT (25, TYPE, OP, NAME ## 25)                       \
+  DO_CONSTANT (26, TYPE, OP, NAME ## 26)                       \
+  DO_CONSTANT (27, TYPE, OP, NAME ## 27)                       \
+  DO_CONSTANT (28, TYPE, OP, NAME ## 28)                       \
+  DO_CONSTANT (29, TYPE, OP, NAME ## 29)                       \
+  DO_CONSTANT (30, TYPE, OP, NAME ## 30)                       \
+  DO_CONSTANT (31, TYPE, OP, NAME ## 31)                       \
+  DO_CONSTANT (32, TYPE, OP, NAME ## 32)                       \
+  DO_CONSTANT (33, TYPE, OP, NAME ## 33)                       \
+  DO_CONSTANT (34, TYPE, OP, NAME ## 34)                       \
+  DO_CONSTANT (35, TYPE, OP, NAME ## 35)                       \
+  DO_CONSTANT (252, TYPE, OP, NAME ## 252)                     \
+  DO_CONSTANT (253, TYPE, OP, NAME ## 253)                     \
+  DO_CONSTANT (254, TYPE, OP, NAME ## 254)                     \
+  DO_CONSTANT (255, TYPE, OP, NAME ## 255)                     \
+  DO_CONSTANT (256, TYPE, OP, NAME ## 256)                     \
+  DO_CONSTANT (257, TYPE, OP, NAME ## 257)                     \
+  DO_CONSTANT (65535, TYPE, OP, NAME ## 65535)                 \
+  DO_CONSTANT (65536, TYPE, OP, NAME ## 65536)                 \
+  DO_CONSTANT (65537, TYPE, OP, NAME ## 65537)                 \
+  DO_CONSTANT (2147483646, TYPE, OP, NAME ## 2147483646)       \
+  DO_CONSTANT (2147483647, TYPE, OP, NAME ## 2147483647)       \
+  DO_CONSTANT (2147483648, TYPE, OP, NAME ## 2147483648)       \
+  DO_CONSTANT (-1, TYPE, OP, NAME ## minus1)                   \
+  DO_CONSTANT (-2, TYPE, OP, NAME ## minus2)                   \
+  DO_CONSTANT (-3, TYPE, OP, NAME ## minus3)                   \
+  DO_CONSTANT (-4, TYPE, OP, NAME ## minus4)                   \
+  DO_CONSTANT (-5, TYPE, OP, NAME ## minus5)                   \
+  DO_CONSTANT (-6, TYPE, OP, NAME ## minus6)                   \
+  DO_CONSTANT (-7, TYPE, OP, NAME ## minus7)                   \
+  DO_CONSTANT (-8, TYPE, OP, NAME ## minus8)                   \
+  DO_CONSTANT (-9, TYPE, OP, NAME ## minus9)
+
+DO_LOGICAL_OPS_BRIEF (char, &, and)
+DO_LOGICAL_OPS_BRIEF (long, &, and)
+
+DO_LOGICAL_OPS (int, &, and)
+DO_LOGICAL_OPS (int, |, or)
+DO_LOGICAL_OPS (int, ^, xor)
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x4\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x5\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0x6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x7\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0x8\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x9\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xa\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xb\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xc\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xd\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xf\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x10\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x11\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x12\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x13\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x14\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x15\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x16\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x17\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x18\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x19\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1a\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1b\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1c\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1d\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1e\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1f\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x20\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x21\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x22\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x23\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfc\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfd\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xff\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x100\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x101\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xffff\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x10000\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x10001\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x7ffffffe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x7fffffff\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x80000000\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xffffffff\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0xfe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffd\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffc\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffb\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffb\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0xfb\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffa\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffa\n} } } */
+/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0xfa\n} } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff9\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff7\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 28 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x4\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x5\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x7\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x8\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x9\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xa\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xb\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xc\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xd\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xf\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x10\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x11\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x12\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x13\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x14\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x15\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x16\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x17\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x18\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x19\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1a\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1b\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1c\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1d\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1e\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1f\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x20\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x21\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x22\n} } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x23\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfc\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfd\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xff\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x100\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x101\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xffff\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x10000\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x10001\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x7ffffffe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x7fffffff\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x80000000\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xffffffff\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffd\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffc\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffb\n} 1 } } */
+/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffa\n} } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff9\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff7\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 22 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x4\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x5\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x7\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x8\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x9\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xa\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xb\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xc\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xd\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xf\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x10\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x11\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x12\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x13\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x14\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x15\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x16\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x17\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x18\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x19\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1a\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1b\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1c\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1d\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1e\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1f\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x20\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x21\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x22\n} } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x23\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfc\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfd\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xff\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x100\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x101\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xffff\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x10000\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x10001\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x7ffffffe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x7fffffff\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x80000000\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xffffffff\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffe\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffd\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffc\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffb\n} 1 } } */
+/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffa\n} } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff9\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff7\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 22 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_1.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_1.c
new file mode 100644 (file)
index 0000000..21d2cdb
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+void __attribute__((noinline, noclone))
+vadd (int *dst, int *op1, int *op2, int count)
+{
+  for (int i = 0; i < count; ++i)
+    dst[i] = op1[i] + op2[i];
+}
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7],} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_1_run.c
new file mode 100644 (file)
index 0000000..35fafe6
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3" } */
+
+#include "loop_add_1.c"
+
+#define ELEMS 10
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  int in1[ELEMS] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+  int in2[ELEMS] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+  int out[ELEMS];
+  int check[ELEMS] = { 3, 5, 7, 9, 11, 13, 15, 17, 19, 21 };
+
+  vadd (out, in1, in2, ELEMS);
+
+  for (int i = 0; i < ELEMS; ++i)
+    if (out[i] != check[i])
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mad_1.c
new file mode 100644 (file)
index 0000000..0378b3e
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmla_##TYPE (TYPE *x, TYPE y, TYPE z)                     \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = (dst * src1) + src2;                                   \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
+
+/* { dg-final { scan-assembler-times {\tmad\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */
+/* { dg-final { scan-assembler-times {\tmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */
+/* { dg-final { scan-assembler-times {\tmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tmad\tz0\.d, p[0-7]/m, z2\.d, z4\.d} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/maxmin_1.c b/gcc/testsuite/gcc.target/aarch64/sve/maxmin_1.c
new file mode 100644 (file)
index 0000000..0b2c820
--- /dev/null
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE))
+
+#define DEF_MAXMIN(TYPE, NAME, CMP_OP)                         \
+void __attribute__ ((noinline, noclone))                       \
+fun_##NAME##_##TYPE (TYPE *restrict r, TYPE *restrict a,       \
+                    TYPE *restrict b)                          \
+{                                                              \
+  for (int i = 0; i < NUM_ELEMS (TYPE); i++)                   \
+    r[i] = a[i] CMP_OP b[i] ? a[i] : b[i];                     \
+}
+
+#define TEST_ALL(T)                    \
+  T (int8_t, max, >)                   \
+  T (int16_t, max, >)                  \
+  T (int32_t, max, >)                  \
+  T (int64_t, max, >)                  \
+  T (uint8_t, max, >)                  \
+  T (uint16_t, max, >)                 \
+  T (uint32_t, max, >)                 \
+  T (uint64_t, max, >)                 \
+  T (_Float16, max, >)                 \
+  T (float, max, >)                    \
+  T (double, max, >)                   \
+                                       \
+  T (int8_t, min, <)                   \
+  T (int16_t, min, <)                  \
+  T (int32_t, min, <)                  \
+  T (int64_t, min, <)                  \
+  T (uint8_t, min, <)                  \
+  T (uint16_t, min, <)                 \
+  T (uint32_t, min, <)                 \
+  T (uint64_t, min, <)                 \
+  T (_Float16, min, <)                 \
+  T (float, min, <)                    \
+  T (double, min, <)
+
+TEST_ALL (DEF_MAXMIN)
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/maxmin_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/maxmin_1_run.c
new file mode 100644 (file)
index 0000000..a734c6a
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include "maxmin_1.c"
+
+#define TEST_LOOP(TYPE, NAME, CMP_OP)                  \
+  {                                                    \
+    TYPE a[NUM_ELEMS (TYPE)];                          \
+    TYPE b[NUM_ELEMS (TYPE)];                          \
+    TYPE r[NUM_ELEMS (TYPE)];                          \
+    for (int i = 0; i < NUM_ELEMS (TYPE); i++)         \
+      {                                                        \
+       a[i] = ((i * 2) % 3) * (i & 1 ? 1 : -1);        \
+       b[i] = (1 + (i % 4)) * (i & 1 ? -1 : 1);        \
+       asm volatile ("" ::: "memory");                 \
+      }                                                        \
+    fun_##NAME##_##TYPE (r, a, b);                     \
+    for (int i = 0; i < NUM_ELEMS (TYPE); i++)         \
+      if (r[i] != (a[i] CMP_OP b[i] ? a[i] : b[i]))    \
+       __builtin_abort ();                             \
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/maxmin_strict_1.c b/gcc/testsuite/gcc.target/aarch64/sve/maxmin_strict_1.c
new file mode 100644 (file)
index 0000000..e90afa2
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <math.h>
+
+#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE))
+
+#define DEF_MAXMIN(TYPE, FUN)                                  \
+void __attribute__ ((noinline, noclone))                       \
+test_##FUN##_##TYPE (TYPE *restrict r, TYPE *restrict a,       \
+                    TYPE *restrict b)                          \
+{                                                              \
+  for (int i = 0; i < NUM_ELEMS (TYPE); i++)                   \
+    r[i] = FUN (a[i], b[i]);                                   \
+}
+
+#define TEST_ALL(T)                            \
+  T (float, fmaxf)                             \
+  T (double, fmax)                             \
+                                               \
+  T (float, fminf)                             \
+  T (double, fmin)
+
+TEST_ALL (DEF_MAXMIN)
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/maxmin_strict_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/maxmin_strict_1_run.c
new file mode 100644 (file)
index 0000000..c905027
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "maxmin_strict_1.c"
+
+#define TEST_LOOP(TYPE, FUN)                           \
+  {                                                    \
+    TYPE a[NUM_ELEMS (TYPE)];                          \
+    TYPE b[NUM_ELEMS (TYPE)];                          \
+    TYPE r[NUM_ELEMS (TYPE)];                          \
+    for (int i = 0; i < NUM_ELEMS (TYPE); i++)         \
+      {                                                        \
+       a[i] = ((i * 2) % 3) * (i & 1 ? 1 : -1);        \
+       b[i] = (1 + (i % 4)) * (i & 1 ? -1 : 1);        \
+       asm volatile ("" ::: "memory");                 \
+      }                                                        \
+    test_##FUN##_##TYPE (r, a, b);                     \
+    for (int i = 0; i < NUM_ELEMS (TYPE); i++)         \
+      if (r[i] != FUN (a[i], b[i]))                    \
+       __builtin_abort ();                             \
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mla_1.c
new file mode 100644 (file)
index 0000000..b496338
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmla_##TYPE (TYPE *x, TYPE y, TYPE z)                     \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = (src1 * src2) + dst;                                   \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
+
+/* { dg-final { scan-assembler-times {\tmla\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmla\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mls_1.c
new file mode 100644 (file)
index 0000000..797ebdb
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmla_##TYPE (TYPE *x, TYPE y, TYPE z)                     \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = dst - (src1 * src2);                                   \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
+
+/* { dg-final { scan-assembler-times {\tmls\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmls\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mov_rr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mov_rr_1.c
new file mode 100644 (file)
index 0000000..20b2344
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+void sve_copy_rr (void)
+{
+  typedef int vnx4si __attribute__((vector_size(32)));
+  register vnx4si x asm ("z1");
+  register vnx4si y asm ("z2");
+  asm volatile ("#foo" : "=w" (x));
+  y = x;
+  asm volatile ("#foo" :: "w" (y));
+}
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/msb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/msb_1.c
new file mode 100644 (file)
index 0000000..5686af5
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
+
+#define DO_OP(TYPE)                                            \
+void vmla_##TYPE (TYPE *x, TYPE y, TYPE z)                     \
+{                                                              \
+  register TYPE dst  asm("z0");                                        \
+  register TYPE src1 asm("z2");                                        \
+  register TYPE src2 asm("z4");                                        \
+  dst = *x;                                                    \
+  src1 = y;                                                    \
+  src2 = z;                                                    \
+  asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2));      \
+  dst = src2 - (dst * src1);                          \
+  asm volatile ("" :: "w" (dst));                              \
+  *x = dst;                                                    \
+}
+
+DO_OP (vnx16qi)
+DO_OP (vnx8hi)
+DO_OP (vnx4si)
+DO_OP (vnx2di)
+
+/* { dg-final { scan-assembler-times {\tmsb\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */
+/* { dg-final { scan-assembler-times {\tmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */
+/* { dg-final { scan-assembler-times {\tmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\tmsb\tz0\.d, p[0-7]/m, z2\.d, z4\.d} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mul_1.c
new file mode 100644 (file)
index 0000000..5856a73
--- /dev/null
@@ -0,0 +1,64 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+#define DO_REGREG_OPS(TYPE, OP, NAME)                          \
+void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count)  \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] OP src[i];                                 \
+}
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME)                \
+void varithimm_##NAME##_##TYPE (TYPE *dst, int count)  \
+{                                                      \
+  for (int i = 0; i < count; ++i)                      \
+    dst[i] = dst[i] OP VALUE;                          \
+}
+
+#define DO_ARITH_OPS(TYPE, OP, NAME)                   \
+  DO_REGREG_OPS (TYPE, OP, NAME);                      \
+  DO_IMMEDIATE_OPS (0, TYPE, OP, NAME ## 0);           \
+  DO_IMMEDIATE_OPS (86, TYPE, OP, NAME ## 86);         \
+  DO_IMMEDIATE_OPS (109, TYPE, OP, NAME ## 109);       \
+  DO_IMMEDIATE_OPS (141, TYPE, OP, NAME ## 141);       \
+  DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1);     \
+  DO_IMMEDIATE_OPS (-110, TYPE, OP, NAME ## minus110); \
+  DO_IMMEDIATE_OPS (-141, TYPE, OP, NAME ## minus141);
+
+DO_ARITH_OPS (int8_t, *, mul)
+DO_ARITH_OPS (int16_t, *, mul)
+DO_ARITH_OPS (int32_t, *, mul)
+DO_ARITH_OPS (int64_t, *, mul)
+
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #115\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #141\n} } } */
+/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-115\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-141\n} } } */
+
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #-141\n} } } */
+
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #-141\n} } } */
+
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.d, z[0-9]+\.d, #141\n} } } */
+/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, #-110\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.d, z[0-9]+\.d, #-141\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/neg_1.c b/gcc/testsuite/gcc.target/aarch64/sve/neg_1.c
new file mode 100644 (file)
index 0000000..2690fcf
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+#define DO_OPS(TYPE)                                   \
+void vneg_##TYPE (TYPE *dst, TYPE *src, int count)     \
+{                                                      \
+  for (int i = 0; i < count; ++i)                      \
+    dst[i] = -src[i];                                  \
+}
+
+DO_OPS (int8_t)
+DO_OPS (int16_t)
+DO_OPS (int32_t)
+DO_OPS (int64_t)
+
+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/nlogical_1.c b/gcc/testsuite/gcc.target/aarch64/sve/nlogical_1.c
new file mode 100644 (file)
index 0000000..a025ae7
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+#define DO_VNLOGICAL(TYPE)                             \
+void __attribute__ ((noinline, noclone))               \
+vnlogical_not_##TYPE (TYPE *dst, int count)            \
+{                                                      \
+  for (int i = 0; i < count; i++)                      \
+    dst[i] = ~dst[i];                                  \
+}                                                      \
+                                                       \
+void __attribute__ ((noinline, noclone))               \
+vnlogical_bic_##TYPE (TYPE *dst, TYPE *src, int count) \
+{                                                      \
+  for (int i = 0; i < count; i++)                      \
+    dst[i] = dst[i] & ~src[i];                         \
+}
+
+#define TEST_ALL(T)                            \
+  T (int8_t)                                   \
+  T (int16_t)                                  \
+  T (int32_t)                                  \
+  T (int64_t)
+
+TEST_ALL (DO_VNLOGICAL)
+
+/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/nlogical_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/nlogical_1_run.c
new file mode 100644 (file)
index 0000000..63d6c23
--- /dev/null
@@ -0,0 +1,37 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O3" } */
+
+#include "nlogical_1.c"
+
+#define N 128
+
+#define TEST_VNLOGICAL(TYPE)                           \
+  {                                                    \
+    TYPE dst[N], src[N];                               \
+    for (int i = 0; i < N; ++i)                                \
+      {                                                        \
+       dst[i] = i ^ 42;                                \
+       asm volatile ("" ::: "memory");                 \
+      }                                                        \
+    vnlogical_not_##TYPE (dst, N);                     \
+    for (int i = 0; i < N; ++i)                                \
+      if (dst[i] != (TYPE) ~(i ^ 42))                  \
+       __builtin_abort ();                             \
+    for (int i = 0; i < N; ++i)                                \
+      {                                                        \
+       dst[i] = i ^ 42;                                \
+       src[i] = i % 5;                                 \
+       asm volatile ("" ::: "memory");                 \
+      }                                                        \
+    vnlogical_bic_##TYPE (dst, src, N);                        \
+    for (int i = 0; i < N; ++i)                                \
+      if (dst[i] != (TYPE) ((i ^ 42) & ~(i % 5)))      \
+       __builtin_abort ();                             \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (TEST_VNLOGICAL)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pack_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pack_1.c
new file mode 100644 (file)
index 0000000..d9de996
--- /dev/null
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define PACK(TYPED, TYPES)                             \
+void __attribute__ ((noinline, noclone))               \
+pack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size)  \
+{                                                      \
+  for (int i = 0; i < size; i++)                       \
+    d[i] = s[i] + 1;                                   \
+}
+
+#define TEST_ALL(T)                            \
+  T (int32_t, int64_t)                         \
+  T (int16_t, int32_t)                         \
+  T (int8_t, int16_t)                          \
+  T (uint32_t, uint64_t)                       \
+  T (uint16_t, uint32_t)                       \
+  T (uint8_t, uint16_t)
+
+TEST_ALL (PACK)
+
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pack_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/pack_1_run.c
new file mode 100644 (file)
index 0000000..1b39b91
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "pack_1.c"
+
+#define ARRAY_SIZE 57
+
+#define TEST_LOOP(TYPED, TYPES)                                        \
+  {                                                            \
+    TYPED arrayd[ARRAY_SIZE];                                  \
+    TYPES arrays[ARRAY_SIZE];                                  \
+    for (int i = 0; i < ARRAY_SIZE; i++)                       \
+      {                                                                \
+       arrays[i] = (i - 10) * 3;                               \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    pack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE);       \
+    for (int i = 0; i < ARRAY_SIZE; i++)                       \
+      if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1))   \
+       __builtin_abort ();                                     \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_signed_1.c
new file mode 100644 (file)
index 0000000..367fbd9
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+pack_int_double_plus_3 (int32_t *d, double *s, int size)
+{
+  for (int i = 0; i < size; i++)
+    d[i] = s[i] + 3;
+}
+
+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_signed_1_run.c
new file mode 100644 (file)
index 0000000..2fcd18b
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "pack_fcvt_signed_1.c"
+
+#define ARRAY_SIZE 107
+
+#define VAL1 ((i * 345.434) - (19 * 345.434))
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  static int32_t array_dest[ARRAY_SIZE];
+  double array_source[ARRAY_SIZE];
+
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+      array_source[i] = VAL1;
+      asm volatile ("" ::: "memory");
+    }
+
+  pack_int_double_plus_3 (array_dest, array_source, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest[i] != (int32_t) VAL1 + 3)
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_unsigned_1.c
new file mode 100644 (file)
index 0000000..c5da480
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+pack_int_double_plus_7 (uint32_t *d, double *s, int size)
+{
+  for (int i = 0; i < size; i++)
+    d[i] = s[i] + 7;
+}
+
+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/pack_fcvt_unsigned_1_run.c
new file mode 100644 (file)
index 0000000..7c54f1b
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "pack_fcvt_unsigned_1.c"
+
+#define ARRAY_SIZE 157
+
+#define VAL1 (i * 9584.3432)
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  static uint32_t array_dest[ARRAY_SIZE];
+  double array_source[ARRAY_SIZE];
+
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+      array_source[i] = VAL1;
+      asm volatile ("" ::: "memory");
+    }
+
+  pack_int_double_plus_7 (array_dest, array_source, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest[i] != (uint32_t) VAL1 + 7)
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pack_float_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pack_float_1.c
new file mode 100644 (file)
index 0000000..2683a87
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void __attribute__ ((noinline, noclone))
+pack_float_plus_1point1 (float *d, double *s, int size)
+{
+  for (int i = 0; i < size; i++)
+    d[i] = s[i] + 1.1;
+}
+
+/* { dg-final { scan-assembler-times {\tfcvt\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pack_float_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/pack_float_1_run.c
new file mode 100644 (file)
index 0000000..3d7a175
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "pack_float_1.c"
+
+#define ARRAY_SIZE 107
+
+#define VAL1 ((i * 886.556) - (43 * 886.556))
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  float array_dest[ARRAY_SIZE];
+  double array_source[ARRAY_SIZE];
+
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+      array_source[i] = VAL1;
+      asm volatile ("" ::: "memory");
+    }
+
+  pack_float_plus_1point1 (array_dest, array_source, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest[i] != (float) (VAL1 + 1.1))
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c b/gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c
new file mode 100644 (file)
index 0000000..188127b
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+popcount_32 (unsigned int *restrict dst, uint32_t *restrict src, int size)
+{
+  for (int i = 0; i < size; ++i)
+    dst[i] = __builtin_popcount (src[i]);
+}
+
+void __attribute__ ((noinline, noclone))
+popcount_64 (unsigned int *restrict dst, uint64_t *restrict src, int size)
+{
+  for (int i = 0; i < size; ++i)
+    dst[i] = __builtin_popcountl (src[i]);
+}
+
+/* { dg-final { scan-assembler-times {\tcnt\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcnt\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/popcount_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/popcount_1_run.c
new file mode 100644 (file)
index 0000000..7494dfd
--- /dev/null
@@ -0,0 +1,50 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "popcount_1.c"
+
+extern void abort (void) __attribute__ ((noreturn));
+
+unsigned int data[] = {
+  0x11111100, 6,
+  0xe0e0f0f0, 14,
+  0x9900aab3, 13,
+  0x00040003, 3,
+  0x000e000c, 5,
+  0x22227777, 16,
+  0x12341234, 10,
+  0x0, 0
+};
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  unsigned int count = sizeof (data) / sizeof (data[0]) / 2;
+
+  uint32_t in32[count];
+  unsigned int out32[count];
+  for (unsigned int i = 0; i < count; ++i)
+    {
+      in32[i] = data[i * 2];
+      asm volatile ("" ::: "memory");
+    }
+  popcount_32 (out32, in32, count);
+  for (unsigned int i = 0; i < count; ++i)
+    if (out32[i] != data[i * 2 + 1])
+      abort ();
+
+  count /= 2;
+  uint64_t in64[count];
+  unsigned int out64[count];
+  for (unsigned int i = 0; i < count; ++i)
+    {
+      in64[i] = ((uint64_t) data[i * 4] << 32) | data[i * 4 + 2];
+      asm volatile ("" ::: "memory");
+    }
+  popcount_64 (out64, in64, count);
+  for (unsigned int i = 0; i < count; ++i)
+    if (out64[i] != data[i * 4 + 1] + data[i * 4 + 3])
+      abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
new file mode 100644 (file)
index 0000000..f86966b
--- /dev/null
@@ -0,0 +1,135 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include <stdint.h>
+
+#define DEF_REDUC_PLUS(TYPE)                   \
+TYPE __attribute__ ((noinline, noclone))       \
+reduc_plus_##TYPE (TYPE *a, int n)             \
+{                                              \
+  TYPE r = 0;                                  \
+  for (int i = 0; i < n; ++i)                  \
+    r += a[i];                                 \
+  return r;                                    \
+}
+
+#define TEST_PLUS(T)                           \
+  T (int8_t)                                   \
+  T (int16_t)                                  \
+  T (int32_t)                                  \
+  T (int64_t)                                  \
+  T (uint8_t)                                  \
+  T (uint16_t)                                 \
+  T (uint32_t)                                 \
+  T (uint64_t)                                 \
+  T (_Float16)                                 \
+  T (float)                                    \
+  T (double)
+
+TEST_PLUS (DEF_REDUC_PLUS)
+
+#define DEF_REDUC_MAXMIN(TYPE, NAME, CMP_OP)   \
+TYPE __attribute__ ((noinline, noclone))       \
+reduc_##NAME##_##TYPE (TYPE *a, int n)         \
+{                                              \
+  TYPE r = 13;                                 \
+  for (int i = 0; i < n; ++i)                  \
+    r = a[i] CMP_OP r ? a[i] : r;              \
+  return r;                                    \
+}
+
+#define TEST_MAXMIN(T)                         \
+  T (int8_t, max, >)                           \
+  T (int16_t, max, >)                          \
+  T (int32_t, max, >)                          \
+  T (int64_t, max, >)                          \
+  T (uint8_t, max, >)                          \
+  T (uint16_t, max, >)                         \
+  T (uint32_t, max, >)                         \
+  T (uint64_t, max, >)                         \
+  T (_Float16, max, >)                         \
+  T (float, max, >)                            \
+  T (double, max, >)                           \
+                                               \
+  T (int8_t, min, <)                           \
+  T (int16_t, min, <)                          \
+  T (int32_t, min, <)                          \
+  T (int64_t, min, <)                          \
+  T (uint8_t, min, <)                          \
+  T (uint16_t, min, <)                         \
+  T (uint32_t, min, <)                         \
+  T (uint64_t, min, <)                         \
+  T (_Float16, min, <)                         \
+  T (float, min, <)                            \
+  T (double, min, <)
+
+TEST_MAXMIN (DEF_REDUC_MAXMIN)
+
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1_run.c
new file mode 100644 (file)
index 0000000..3fcb7fb
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include "reduc_1.c"
+
+#define NUM_ELEMS(TYPE) (73 + sizeof (TYPE))
+
+#define INIT_VECTOR(TYPE)                              \
+  TYPE a[NUM_ELEMS (TYPE) + 1];                                \
+  for (int i = 0; i < NUM_ELEMS (TYPE) + 1; i++)       \
+    {                                                  \
+      a[i] = (i * 2) * (i & 1 ? 1 : -1);               \
+      asm volatile ("" ::: "memory");                  \
+    }
+
+#define TEST_REDUC_PLUS(TYPE)                          \
+  {                                                    \
+    INIT_VECTOR (TYPE);                                        \
+    TYPE r1 = reduc_plus_##TYPE (a, NUM_ELEMS (TYPE)); \
+    volatile TYPE r2 = 0;                              \
+    for (int i = 0; i < NUM_ELEMS (TYPE); ++i)         \
+      r2 += a[i];                                      \
+    if (r1 != r2)                                      \
+      __builtin_abort ();                              \
+  }
+
+#define TEST_REDUC_MAXMIN(TYPE, NAME, CMP_OP)                  \
+  {                                                            \
+    INIT_VECTOR (TYPE);                                                \
+    TYPE r1 = reduc_##NAME##_##TYPE (a, NUM_ELEMS (TYPE));     \
+    volatile TYPE r2 = 13;                                     \
+    for (int i = 0; i < NUM_ELEMS (TYPE); ++i)                 \
+      r2 = a[i] CMP_OP r2 ? a[i] : r2;                         \
+    if (r1 != r2)                                              \
+      __builtin_abort ();                                      \
+  }
+
+int main ()
+{
+  TEST_PLUS (TEST_REDUC_PLUS)
+  TEST_MAXMIN (TEST_REDUC_MAXMIN)
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
new file mode 100644 (file)
index 0000000..adc3699
--- /dev/null
@@ -0,0 +1,106 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE))
+
+#define DEF_REDUC_PLUS(TYPE)                                   \
+void __attribute__ ((noinline, noclone))                       \
+reduc_plus_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)],       \
+                  TYPE *restrict r, int n)                     \
+{                                                              \
+  for (int i = 0; i < n; i++)                                  \
+    {                                                          \
+      r[i] = 0;                                                        \
+      for (int j = 0; j < NUM_ELEMS (TYPE); j++)               \
+        r[i] += a[i][j];                                       \
+    }                                                          \
+}
+
+#define TEST_PLUS(T)                           \
+  T (int8_t)                                   \
+  T (int16_t)                                  \
+  T (int32_t)                                  \
+  T (int64_t)                                  \
+  T (uint8_t)                                  \
+  T (uint16_t)                                 \
+  T (uint32_t)                                 \
+  T (uint64_t)                                 \
+  T (_Float16)                                 \
+  T (float)                                    \
+  T (double)
+
+TEST_PLUS (DEF_REDUC_PLUS)
+
+#define DEF_REDUC_MAXMIN(TYPE, NAME, CMP_OP)                   \
+void __attribute__ ((noinline, noclone))                       \
+reduc_##NAME##_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)],   \
+                      TYPE *restrict r, int n)                 \
+{                                                              \
+  for (int i = 0; i < n; i++)                                  \
+    {                                                          \
+      r[i] = a[i][0];                                          \
+      for (int j = 0; j < NUM_ELEMS (TYPE); j++)               \
+        r[i] = a[i][j] CMP_OP r[i] ? a[i][j] : r[i];           \
+    }                                                          \
+}
+
+#define TEST_MAXMIN(T)                         \
+  T (int8_t, max, >)                           \
+  T (int16_t, max, >)                          \
+  T (int32_t, max, >)                          \
+  T (int64_t, max, >)                          \
+  T (uint8_t, max, >)                          \
+  T (uint16_t, max, >)                         \
+  T (uint32_t, max, >)                         \
+  T (uint64_t, max, >)                         \
+  T (_Float16, max, >)                         \
+  T (float, max, >)                            \
+  T (double, max, >)                           \
+                                               \
+  T (int8_t, min, <)                           \
+  T (int16_t, min, <)                          \
+  T (int32_t, min, <)                          \
+  T (int64_t, min, <)                          \
+  T (uint8_t, min, <)                          \
+  T (uint16_t, min, <)                         \
+  T (uint32_t, min, <)                         \
+  T (uint64_t, min, <)                         \
+  T (_Float16, min, <)                         \
+  T (float, min, <)                            \
+  T (double, min, <)
+
+TEST_MAXMIN (DEF_REDUC_MAXMIN)
+
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsmaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tsminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2_run.c
new file mode 100644 (file)
index 0000000..f48e348
--- /dev/null
@@ -0,0 +1,65 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include "reduc_2.c"
+
+#define NROWS 53
+
+/* -ffast-math fuzz for PLUS.  */
+#define CMP__Float16(X, Y) ((X) >= (Y) * 0.875 && (X) <= (Y) * 1.125)
+#define CMP_float(X, Y) ((X) == (Y))
+#define CMP_double(X, Y) ((X) == (Y))
+#define CMP_int8_t(X, Y) ((X) == (Y))
+#define CMP_int16_t(X, Y) ((X) == (Y))
+#define CMP_int32_t(X, Y) ((X) == (Y))
+#define CMP_int64_t(X, Y) ((X) == (Y))
+#define CMP_uint8_t(X, Y) ((X) == (Y))
+#define CMP_uint16_t(X, Y) ((X) == (Y))
+#define CMP_uint32_t(X, Y) ((X) == (Y))
+#define CMP_uint64_t(X, Y) ((X) == (Y))
+
+#define INIT_MATRIX(TYPE)                              \
+  TYPE mat[NROWS][NUM_ELEMS (TYPE)];                   \
+  TYPE r[NROWS];                                       \
+  for (int i = 0; i < NROWS; i++)                      \
+    for (int j = 0; j < NUM_ELEMS (TYPE); j++)         \
+      {                                                        \
+       mat[i][j] = i + (j * 2) * (j & 1 ? 1 : -1);     \
+       asm volatile ("" ::: "memory");                 \
+      }
+
+#define TEST_REDUC_PLUS(TYPE)                          \
+  {                                                    \
+    INIT_MATRIX (TYPE);                                        \
+    reduc_plus_##TYPE (mat, r, NROWS);                 \
+    for (int i = 0; i < NROWS; i++)                    \
+      {                                                        \
+       volatile TYPE r2 = 0;                           \
+       for (int j = 0; j < NUM_ELEMS (TYPE); ++j)      \
+         r2 += mat[i][j];                              \
+       if (!CMP_##TYPE (r[i], r2))                     \
+         __builtin_abort ();                           \
+      }                                                        \
+    }
+
+#define TEST_REDUC_MAXMIN(TYPE, NAME, CMP_OP)          \
+  {                                                    \
+    INIT_MATRIX (TYPE);                                        \
+    reduc_##NAME##_##TYPE (mat, r, NROWS);             \
+    for (int i = 0; i < NROWS; i++)                    \
+      {                                                        \
+       volatile TYPE r2 = mat[i][0];                   \
+       for (int j = 0; j < NUM_ELEMS (TYPE); ++j)      \
+         r2 = mat[i][j] CMP_OP r2 ? mat[i][j] : r2;    \
+       if (r[i] != r2)                                 \
+         __builtin_abort ();                           \
+      }                                                        \
+    }
+
+int main ()
+{
+  TEST_PLUS (TEST_REDUC_PLUS)
+  TEST_MAXMIN (TEST_REDUC_MAXMIN)
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_3.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_3.c
new file mode 100644 (file)
index 0000000..4561199
--- /dev/null
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE))
+
+#define REDUC_PTR(DSTTYPE, SRCTYPE)                            \
+void reduc_ptr_##DSTTYPE##_##SRCTYPE (DSTTYPE *restrict sum,   \
+                                     SRCTYPE *restrict array,  \
+                                     int count)                \
+{                                                              \
+  *sum = 0;                                                    \
+  for (int i = 0; i < count; ++i)                              \
+    *sum += array[i];                                          \
+}
+
+REDUC_PTR (int8_t, int8_t)
+REDUC_PTR (int16_t, int16_t)
+
+REDUC_PTR (int32_t, int32_t)
+REDUC_PTR (int64_t, int64_t)
+
+REDUC_PTR (_Float16, _Float16)
+REDUC_PTR (float, float)
+REDUC_PTR (double, double)
+
+/* Widening reductions.  */
+REDUC_PTR (int32_t, int8_t)
+REDUC_PTR (int32_t, int16_t)
+
+REDUC_PTR (int64_t, int8_t)
+REDUC_PTR (int64_t, int16_t)
+REDUC_PTR (int64_t, int32_t)
+
+REDUC_PTR (float, _Float16)
+REDUC_PTR (double, float)
+
+/* Float<>Int conversions */
+REDUC_PTR (_Float16, int16_t)
+REDUC_PTR (float, int32_t)
+REDUC_PTR (double, int64_t)
+
+REDUC_PTR (int16_t, _Float16)
+REDUC_PTR (int32_t, float)
+REDUC_PTR (int64_t, double)
+
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/rev_1.c b/gcc/testsuite/gcc.target/aarch64/sve/rev_1.c
new file mode 100644 (file)
index 0000000..732ca70
--- /dev/null
@@ -0,0 +1,49 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+
+#define MASK_2(X, Y) (Y) - 1 - (X), (Y) - 2 - (X)
+#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
+#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
+#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
+#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
+
+#define INDEX_32 vnx16qi
+#define INDEX_16 vnx8hi
+#define INDEX_8 vnx4si
+#define INDEX_4 vnx2di
+
+#define PERMUTE(TYPE, NUNITS)                                          \
+  TYPE permute_##TYPE (TYPE values1, TYPE values2)                     \
+  {                                                                    \
+    return __builtin_shuffle                                           \
+      (values1, values2,                                               \
+       ((INDEX_##NUNITS) { MASK_##NUNITS (0, NUNITS) }));              \
+  }
+
+#define TEST_ALL(T)                            \
+  T (vnx16qi, 32)                              \
+  T (vnx8hi, 16)                               \
+  T (vnx4si, 8)                                        \
+  T (vnx2di, 4)                                        \
+  T (vnx8hf, 16)                               \
+  T (vnx4sf, 8)                                        \
+  T (vnx2df, 4)
+
+TEST_ALL (PERMUTE)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c
new file mode 100644 (file)
index 0000000..1a3d9b4
--- /dev/null
@@ -0,0 +1,35 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+
+#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
+#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
+#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
+#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
+#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
+
+#define INDEX_32 vnx16qi
+
+#define PERMUTE(TYPE, NUNITS, REV_NUNITS)                              \
+  TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2)      \
+  {                                                                    \
+    return __builtin_shuffle                                           \
+      (values1, values2,                                               \
+       ((INDEX_##NUNITS) { MASK_##NUNITS (0, REV_NUNITS - 1) }));      \
+  }
+
+#define TEST_ALL(T)                            \
+  T (vnx16qi, 32, 2)                           \
+  T (vnx16qi, 32, 4)                           \
+  T (vnx16qi, 32, 8)
+
+TEST_ALL (PERMUTE)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 1 } } */
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c b/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c
new file mode 100644 (file)
index 0000000..7614581
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef uint16_t vnx8hi __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
+#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
+#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
+#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
+#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
+
+#define INDEX_16 vnx8hi
+
+#define PERMUTE(TYPE, NUNITS, REV_NUNITS)                              \
+  TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2)      \
+  {                                                                    \
+    return __builtin_shuffle                                           \
+      (values1, values2,                                               \
+       ((INDEX_##NUNITS) { MASK_##NUNITS (0, REV_NUNITS - 1) }));      \
+  }
+
+#define TEST_ALL(T)                            \
+  T (vnx8hi, 16, 2)                            \
+  T (vnx8hi, 16, 4)                            \
+  T (vnx8hf, 16, 2)                            \
+  T (vnx8hf, 16, 4)
+
+TEST_ALL (PERMUTE)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c b/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c
new file mode 100644 (file)
index 0000000..8ac68b7
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef uint32_t vnx4si __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+
+#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
+#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
+#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
+
+#define INDEX_8 vnx4si
+
+#define PERMUTE(TYPE, NUNITS, REV_NUNITS)                              \
+  TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2)      \
+  {                                                                    \
+    return __builtin_shuffle                                           \
+      (values1, values2,                                               \
+       ((INDEX_##NUNITS) { MASK_##NUNITS (0, REV_NUNITS - 1) }));      \
+  }
+
+#define TEST_ALL(T)                            \
+  T (vnx4si, 8, 2)                             \
+  T (vnx4sf, 8, 2)
+
+TEST_ALL (PERMUTE)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trevw\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c b/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c
new file mode 100644 (file)
index 0000000..f4c5ebd
--- /dev/null
@@ -0,0 +1,108 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#define DO_REG_OPS(TYPE)                                       \
+void ashiftr_##TYPE (TYPE *dst, TYPE src, int count)           \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] >> src;                                    \
+}                                                              \
+void lshiftr_##TYPE (u##TYPE *dst, u##TYPE src, int count)     \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] >> src;                                    \
+}                                                              \
+void lshiftl_##TYPE (u##TYPE *dst, u##TYPE src, int count)     \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] << src;                                    \
+}                                                              \
+void vashiftr_##TYPE (TYPE *dst, TYPE *src, int count)         \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] >> src[i];                                 \
+}                                                              \
+void vlshiftr_##TYPE (u##TYPE *dst, u##TYPE *src, int count)   \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] >> src[i];                                 \
+}                                                              \
+void vlshiftl_##TYPE (u##TYPE *dst, u##TYPE *src, int count)   \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] << src[i];                                 \
+}
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME)                    \
+void vashiftr_imm_##NAME##_##TYPE (TYPE *dst, int count)       \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] >> VALUE;                                  \
+}                                                              \
+void vlshiftr_imm_##NAME##_##TYPE (u##TYPE *dst, int count)    \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] >> VALUE;                                  \
+}                                                              \
+void vlshiftl_imm_##NAME##_##TYPE (u##TYPE *dst, int count)    \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = dst[i] << VALUE;                                  \
+}
+
+DO_REG_OPS (int32_t);
+DO_REG_OPS (int64_t);
+
+DO_IMMEDIATE_OPS (0, int8_t, 0);
+DO_IMMEDIATE_OPS (5, int8_t, 5);
+DO_IMMEDIATE_OPS (7, int8_t, 7);
+
+DO_IMMEDIATE_OPS (0, int16_t, 0);
+DO_IMMEDIATE_OPS (5, int16_t, 5);
+DO_IMMEDIATE_OPS (15, int16_t, 15);
+
+DO_IMMEDIATE_OPS (0, int32_t, 0);
+DO_IMMEDIATE_OPS (5, int32_t, 5);
+DO_IMMEDIATE_OPS (31, int32_t, 31);
+
+DO_IMMEDIATE_OPS (0, int64_t, 0);
+DO_IMMEDIATE_OPS (5, int64_t, 5);
+DO_IMMEDIATE_OPS (63, int64_t, 63);
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c
new file mode 100644 (file)
index 0000000..a5dd5eb
--- /dev/null
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fopenmp-simd -msve-vector-bits=256" } */
+
+#ifndef N
+#define N 32
+#endif
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE, VALUE)                 \
+  void                                         \
+  test_##TYPE (TYPE *data)                     \
+  {                                            \
+    _Pragma ("omp simd")                       \
+    for (int i = 0; i < N / sizeof (TYPE); ++i)        \
+      data[i] = VALUE;                         \
+  }
+
+TEST_LOOP (uint8_t, 1)
+TEST_LOOP (int8_t, 2)
+TEST_LOOP (uint16_t, 3)
+TEST_LOOP (int16_t, 4)
+TEST_LOOP (uint32_t, 5)
+TEST_LOOP (int32_t, 6)
+TEST_LOOP (uint64_t, 7)
+TEST_LOOP (int64_t, 8)
+TEST_LOOP (_Float16, 1.0f)
+TEST_LOOP (float, 2.0f)
+TEST_LOOP (double, 3.0)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #4\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl16\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl8\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl4\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+
+/* { dg-final { scan-assembler-not {\twhile} } } */
+/* { dg-final { scan-assembler-not {\tb} } } */
+/* { dg-final { scan-assembler-not {\tcmp} } } */
+/* { dg-final { scan-assembler-not {\tindex} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c
new file mode 100644 (file)
index 0000000..1ef72b6
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fopenmp-simd -msve-vector-bits=512" } */
+
+#define N 64
+#include "single_1.c"
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #4\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl32\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl16\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl8\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+
+/* { dg-final { scan-assembler-not {\twhile} } } */
+/* { dg-final { scan-assembler-not {\tb} } } */
+/* { dg-final { scan-assembler-not {\tcmp} } } */
+/* { dg-final { scan-assembler-not {\tindex} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c
new file mode 100644 (file)
index 0000000..fefbfbf
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fopenmp-simd -msve-vector-bits=1024" } */
+
+#define N 128
+#include "single_1.c"
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #4\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl64\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl32\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl16\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+
+/* { dg-final { scan-assembler-not {\twhile} } } */
+/* { dg-final { scan-assembler-not {\tb} } } */
+/* { dg-final { scan-assembler-not {\tcmp} } } */
+/* { dg-final { scan-assembler-not {\tindex} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c
new file mode 100644 (file)
index 0000000..c1b1af6
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fopenmp-simd -msve-vector-bits=2048" } */
+
+#define N 256
+#include "single_1.c"
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #4\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl128\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl64\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl32\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+
+/* { dg-final { scan-assembler-not {\twhile} } } */
+/* { dg-final { scan-assembler-not {\tb} } } */
+/* { dg-final { scan-assembler-not {\tcmp} } } */
+/* { dg-final { scan-assembler-not {\tindex} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/spill_1.c b/gcc/testsuite/gcc.target/aarch64/sve/spill_1.c
new file mode 100644 (file)
index 0000000..328a677
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+void consumer (void *);
+
+#define TEST_LOOP(TYPE, VAL)                   \
+  void                                         \
+  double_loop_##TYPE (TYPE *x)                 \
+  {                                            \
+    for (int i = 0; i < 100; ++i)              \
+      x[i] += VAL;                             \
+    consumer (x);                              \
+    for (int i = 0; i < 100; ++i)              \
+      x[i] += VAL;                             \
+    consumer (x);                              \
+  }
+
+TEST_LOOP (uint16_t, 511);
+TEST_LOOP (uint32_t, 511);
+TEST_LOOP (uint64_t, 511);
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #511\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #511\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #511\n} 2 } } */
+/* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */
+/* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/store_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/store_scalar_offset_1.c
new file mode 100644 (file)
index 0000000..4f0655f
--- /dev/null
@@ -0,0 +1,55 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+
+void sve_store_64_z_lsl (uint64_t *a, unsigned long i)
+{
+  asm volatile ("" : "=w" (*(vnx2di *) &a[i]));
+}
+
+void sve_store_64_s_lsl (int64_t *a, signed long i)
+{
+  asm volatile ("" : "=w" (*(vnx2di *) &a[i]));
+}
+
+void sve_store_32_z_lsl (uint32_t *a, unsigned long i)
+{
+  asm volatile ("" : "=w" (*(vnx4si *) &a[i]));
+}
+
+void sve_store_32_s_lsl (int32_t *a, signed long i)
+{
+  asm volatile ("" : "=w" (*(vnx4si *) &a[i]));
+}
+
+void sve_store_16_z_lsl (uint16_t *a, unsigned long i)
+{
+  asm volatile ("" : "=w" (*(vnx8hi *) &a[i]));
+}
+
+void sve_store_16_s_lsl (int16_t *a, signed long i)
+{
+  asm volatile ("" : "=w" (*(vnx8hi *) &a[i]));
+}
+
+/* ??? The other argument order leads to a redundant move.  */
+void sve_store_8_z (unsigned long i, uint8_t *a)
+{
+  asm volatile ("" : "=w" (*(vnx16qi *) &a[i]));
+}
+
+void sve_store_8_s (signed long i, int8_t *a)
+{
+  asm volatile ("" : "=w" (*(vnx16qi *) &a[i]));
+}
+
+/* { dg-final { scan-assembler-times {\tst1d\tz0\.d, p[0-7], \[x0, x1, lsl 3\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz0\.s, p[0-7], \[x0, x1, lsl 2\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz0\.h, p[0-7], \[x0, x1, lsl 1\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz0\.b, p[0-7], \[x1, x0\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/subr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/subr_1.c
new file mode 100644 (file)
index 0000000..734c9e6
--- /dev/null
@@ -0,0 +1,64 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME)                    \
+void vsubr_arithimm_##NAME##_##TYPE (TYPE *dst, int count)     \
+{                                                              \
+  for (int i = 0; i < count; ++i)                              \
+    dst[i] = VALUE - dst[i];                                   \
+}
+
+#define DO_ARITH_OPS(TYPE)                     \
+  DO_IMMEDIATE_OPS (0, TYPE, 0);               \
+  DO_IMMEDIATE_OPS (5, TYPE, 5);               \
+  DO_IMMEDIATE_OPS (255, TYPE, 255);           \
+  DO_IMMEDIATE_OPS (256, TYPE, 256);           \
+  DO_IMMEDIATE_OPS (257, TYPE, 257);           \
+  DO_IMMEDIATE_OPS (65280, TYPE, 65280);       \
+  DO_IMMEDIATE_OPS (65281, TYPE, 65281);       \
+  DO_IMMEDIATE_OPS (-1, TYPE, minus1);
+
+DO_ARITH_OPS (int8_t)
+DO_ARITH_OPS (int16_t)
+DO_ARITH_OPS (int32_t)
+DO_ARITH_OPS (int64_t)
+
+/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #257\n} } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #65280\n} } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #65281\n} } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #256\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #257\n} } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #65280\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #65281\n} } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #256\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #257\n} } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #65280\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #65281\n} } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} } } */
+
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #256\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #257\n} } } */
+/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #65280\n} 1 } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #65281\n} } } */
+/* { dg-final { scan-assembler-not   {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/trn1_1.c b/gcc/testsuite/gcc.target/aarch64/sve/trn1_1.c
new file mode 100644 (file)
index 0000000..f1246ad
--- /dev/null
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#ifndef BIAS
+#define BIAS 0
+#endif
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define MASK_2(X, Y) X, Y + X
+#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
+#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
+#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
+#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
+
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
+
+#define PERMUTE(TYPE, NUNITS)                                  \
+  TYPE permute_##TYPE (TYPE values1, TYPE values2)             \
+  {                                                            \
+    return __builtin_shuffle                                   \
+      (values1, values2,                                       \
+       ((INDEX_##NUNITS) { MASK_##NUNITS (BIAS, NUNITS) }));   \
+  }
+
+#define TEST_ALL(T)                            \
+  T (vnx2di, 4)                                        \
+  T (vnx4si, 8)                                        \
+  T (vnx8hi, 16)                               \
+  T (vnx16qi, 32)                              \
+  T (vnx2df, 4)                                        \
+  T (vnx4sf, 8)                                        \
+  T (vnx8hf, 16)
+
+TEST_ALL (PERMUTE)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/trn2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/trn2_1.c
new file mode 100644 (file)
index 0000000..97e48a8
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#define BIAS 1
+#include "trn1_1.c"
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c
new file mode 100644 (file)
index 0000000..83ffe85
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+unpack_double_int_plus8 (double *d, int32_t *s, int size)
+{
+  for (int i = 0; i < size; i++)
+    d[i] = s[i] + 8;
+}
+
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1_run.c
new file mode 100644 (file)
index 0000000..e2a0937
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "unpack_fcvt_signed_1.c"
+
+#define ARRAY_SIZE 89
+
+#define VAL1 ((i * 88654) - (33 * 88654))
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  double array_dest[ARRAY_SIZE];
+  int32_t array_source[ARRAY_SIZE];
+
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+      array_source[i] = VAL1;
+      asm volatile ("" ::: "memory");
+    }
+
+  unpack_double_int_plus8 (array_dest, array_source, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest[i] != (double) (VAL1 + 8))
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c
new file mode 100644 (file)
index 0000000..e2f6b1a
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+void __attribute__ ((noinline, noclone))
+unpack_double_int_plus9 (double *d, uint32_t *s, int size)
+{
+  for (int i = 0; i < size; i++)
+    d[i] = (double) (s[i] + 9);
+}
+
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1_run.c
new file mode 100644 (file)
index 0000000..829a851
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "unpack_fcvt_unsigned_1.c"
+
+#define ARRAY_SIZE 153
+
+#define VAL1 ((unsigned int) ((i * 345435) - (21 * 345435)))
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  double array_dest[ARRAY_SIZE];
+  uint32_t array_source[ARRAY_SIZE];
+
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+      array_source[i] = VAL1;
+      asm volatile ("" ::: "memory");
+    }
+
+  unpack_double_int_plus9 (array_dest, array_source, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest[i] != (double) (VAL1 + 9))
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1.c
new file mode 100644 (file)
index 0000000..14a636b
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void __attribute__ ((noinline, noclone))
+unpack_float_plus_7point9 (double *d, float *s, int size)
+{
+  for (int i = 0; i < size; i++)
+    d[i] = s[i] + 7.9;
+}
+
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfcvt\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1_run.c
new file mode 100644 (file)
index 0000000..5f363a8
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline" } */
+
+#include "unpack_float_1.c"
+
+#define ARRAY_SIZE 199
+
+#define VAL1 ((float) ((i * 645.56665) - (645.56665)))
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  double array_dest[ARRAY_SIZE];
+  float array_source[ARRAY_SIZE];
+
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    {
+      array_source[i] = VAL1;
+      asm volatile ("" ::: "memory");
+    }
+
+  unpack_float_plus_7point9 (array_dest, array_source, ARRAY_SIZE);
+  for (int i = 0; i < ARRAY_SIZE; i++)
+    if (array_dest[i] != (double) (VAL1 + 7.9))
+      __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_signed_1.c
new file mode 100644 (file)
index 0000000..d4da369
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline" } */
+
+#include <stdint.h>
+
+#define UNPACK(TYPED, TYPES)                                           \
+void __attribute__ ((noinline, noclone))                               \
+unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, TYPES mask, int size)    \
+{                                                                      \
+  for (int i = 0; i < size; i++)                                       \
+    d[i] = (TYPES) (s[i] | mask);                                      \
+}
+
+#define TEST_ALL(T)                    \
+  T (int64_t, int32_t)                 \
+  T (int32_t, int16_t)                 \
+  T (int16_t, int8_t)                  \
+  T (uint64_t, int32_t)                        \
+  T (uint32_t, int16_t)                        \
+  T (uint16_t, int8_t)
+
+TEST_ALL (UNPACK)
+
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.h, z[0-9]+\.b\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.h, z[0-9]+\.b\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_signed_1_run.c
new file mode 100644 (file)
index 0000000..956340f
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline" } */
+
+#include "unpack_signed_1.c"
+
+#define ARRAY_SIZE 33
+
+#define TEST_LOOP(TYPED, TYPES)                                        \
+  {                                                            \
+    TYPED arrayd[ARRAY_SIZE];                                  \
+    TYPES arrays[ARRAY_SIZE];                                  \
+    for (int i = 0; i < ARRAY_SIZE; i++)                       \
+      {                                                                \
+       arrays[i] = (i - 10) * 3;                               \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    unpack_##TYPED##_##TYPES (arrayd, arrays, 7, ARRAY_SIZE);  \
+    for (int i = 0; i < ARRAY_SIZE; i++)                       \
+      if (arrayd[i] != (TYPED) (TYPES) (((i - 10) * 3) | 7))   \
+       __builtin_abort ();                                     \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_unsigned_1.c
new file mode 100644 (file)
index 0000000..6d77ced
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline" } */
+
+#include <stdint.h>
+
+#define UNPACK(TYPED, TYPES)                           \
+void __attribute__ ((noinline, noclone))               \
+unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size)        \
+{                                                      \
+  for (int i = 0; i < size; i++)                       \
+    d[i] = (TYPES) (s[i] + 1);                         \
+}
+
+#define TEST_ALL(T)                    \
+  T (int64_t, uint32_t)                        \
+  T (int32_t, uint16_t)                        \
+  T (int16_t, uint8_t)                 \
+  T (uint64_t, uint32_t)               \
+  T (uint32_t, uint16_t)               \
+  T (uint16_t, uint8_t)
+
+TEST_ALL (UNPACK)
+
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.h, z[0-9]+\.b\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.h, z[0-9]+\.b\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_unsigned_1_run.c
new file mode 100644 (file)
index 0000000..3cc3778
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline" } */
+
+#include "unpack_unsigned_1.c"
+
+#define ARRAY_SIZE 85
+
+#define TEST_LOOP(TYPED, TYPES)                                        \
+  {                                                            \
+    TYPED arrayd[ARRAY_SIZE];                                  \
+    TYPES arrays[ARRAY_SIZE];                                  \
+    for (int i = 0; i < ARRAY_SIZE; i++)                       \
+      {                                                                \
+       arrays[i] = (i - 10) * 3;                               \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE);     \
+    for (int i = 0; i < ARRAY_SIZE; i++)                       \
+      if (arrayd[i] != (TYPED) (TYPES) (((i - 10) * 3) + 1))   \
+       __builtin_abort ();                                     \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/uzp1_1.c b/gcc/testsuite/gcc.target/aarch64/sve/uzp1_1.c
new file mode 100644 (file)
index 0000000..789fb0c
--- /dev/null
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define UZP1(TYPE, MASK)                               \
+TYPE uzp1_##TYPE (TYPE values1, TYPE values2)          \
+{                                                      \
+  return __builtin_shuffle (values1, values2, MASK);   \
+}
+
+
+UZP1 (vnx2di,  ((vnx2di) { 0, 2, 4, 6 }));
+UZP1 (vnx4si,  ((vnx4si) { 0, 2, 4, 6, 8, 10, 12, 14 }));
+UZP1 (vnx8hi,  ((vnx8hi) { 0, 2, 4, 6, 8, 10, 12, 14,
+                          16, 18, 20, 22, 24, 26, 28, 30 }));
+UZP1 (vnx16qi, ((vnx16qi) { 0, 2, 4, 6, 8, 10, 12, 14,
+                           16, 18, 20, 22, 24, 26, 28, 30,
+                           32, 34, 36, 38, 40, 42, 44, 46,
+                           48, 50, 52, 54, 56, 58, 60, 62 }));
+UZP1 (vnx2df,  ((vnx2di) { 0, 2, 4, 6 }));
+UZP1 (vnx4sf,  ((vnx4si) { 0, 2, 4, 6, 8, 10, 12, 14 }));
+UZP1 (vnx8hf,  ((vnx8hi) { 0, 2, 4, 6, 8, 10, 12, 14,
+                          16, 18, 20, 22, 24, 26, 28, 30 }));
+
+/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} } } */
+
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/uzp1_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/uzp1_1_run.c
new file mode 100644 (file)
index 0000000..b0abd88
--- /dev/null
@@ -0,0 +1,63 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O" } */
+
+#include "uzp1_1.c"
+
+#define TEST_UZP1(TYPE, EXPECTED_RESULT, VALUES1, VALUES2)             \
+{                                                                      \
+  TYPE expected_result = EXPECTED_RESULT;                              \
+  TYPE values1 = VALUES1;                                              \
+  TYPE values2 = VALUES2;                                              \
+  TYPE dest;                                                           \
+  dest = uzp1_##TYPE (values1, values2);                               \
+  if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0)  \
+    __builtin_abort ();                                                        \
+}
+
+int main (void)
+{
+  TEST_UZP1 (vnx2di,
+            ((vnx2di) { 4, 6, 12, 36 }),
+            ((vnx2di) { 4, 5, 6, 7 }),
+            ((vnx2di) { 12, 24, 36, 48 }));
+  TEST_UZP1 (vnx4si,
+            ((vnx4si) { 3, 5, 7, 9, 33, 35, 37, 39 }),
+            ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+            ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+  TEST_UZP1 (vnx8hi,
+            ((vnx8hi) { 3, 5, 7, 9, 11, 13, 15, 17,
+                        33, 35, 37, 39, 41, 43, 45, 47 }),
+            ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+                        11, 12, 13, 14, 15, 16, 17, 18 }),
+            ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+                        41, 42, 43, 44, 45, 46, 47, 48 }));
+  TEST_UZP1 (vnx16qi,
+            ((vnx16qi) { 4, 6, 4, 6, 4, 6, 4, 6,
+                         4, 6, 4, 6, 4, 6, 4, 6,
+                         12, 36, 12, 36, 12, 36, 12, 36,
+                         12, 36, 12, 36, 12, 36, 12, 36 }),
+            ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+                         4, 5, 6, 7, 4, 5, 6, 7,
+                         4, 5, 6, 7, 4, 5, 6, 7,
+                         4, 5, 6, 7, 4, 5, 6, 7 }),
+            ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+                         12, 24, 36, 48, 12, 24, 36, 48,
+                         12, 24, 36, 48, 12, 24, 36, 48,
+                         12, 24, 36, 48, 12, 24, 36, 48 }));
+  TEST_UZP1 (vnx2df,
+            ((vnx2df) { 4.0, 6.0, 12.0, 36.0 }),
+            ((vnx2df) { 4.0, 5.0, 6.0, 7.0 }),
+            ((vnx2df) { 12.0, 24.0, 36.0, 48.0 }));
+  TEST_UZP1 (vnx4sf,
+            ((vnx4sf) { 3.0, 5.0, 7.0, 9.0, 33.0, 35.0, 37.0, 39.0 }),
+            ((vnx4sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }),
+            ((vnx4sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 }));
+  TEST_UZP1 (vnx8hf,
+            ((vnx8hf) { 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0,
+                        33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0 }),
+            ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                        11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+            ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                        41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/uzp2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/uzp2_1.c
new file mode 100644 (file)
index 0000000..def490d
--- /dev/null
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define UZP2(TYPE, MASK)                               \
+TYPE uzp2_##TYPE (TYPE values1, TYPE values2)          \
+{                                                      \
+  return __builtin_shuffle (values1, values2, MASK);   \
+}
+
+UZP2 (vnx2di,  ((vnx2di) { 1, 3, 5, 7 }));
+UZP2 (vnx4si,  ((vnx4si) { 1, 3, 5, 7, 9, 11, 13, 15 }));
+UZP2 (vnx8hi,  ((vnx8hi) { 1, 3, 5, 7, 9, 11, 13, 15,
+                          17, 19, 21, 23, 25, 27, 29, 31 }));
+UZP2 (vnx16qi, ((vnx16qi) { 1, 3, 5, 7, 9, 11, 13, 15,
+                           17, 19, 21, 23, 25, 27, 29, 31,
+                           33, 35, 37, 39, 41, 43, 45, 47,
+                           49, 51, 53, 55, 57, 59, 61, 63 }));
+UZP2 (vnx2df,  ((vnx2di) { 1, 3, 5, 7 }));
+UZP2 (vnx4sf,  ((vnx4si) { 1, 3, 5, 7, 9, 11, 13, 15 }));
+UZP2 (vnx8hf,  ((vnx8hi) { 1, 3, 5, 7, 9, 11, 13, 15,
+                          17, 19, 21, 23, 25, 27, 29, 31 }));
+
+/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} } } */
+
+/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/uzp2_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/uzp2_1_run.c
new file mode 100644 (file)
index 0000000..2a6f8ba
--- /dev/null
@@ -0,0 +1,63 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O" } */
+
+#include "uzp2_1.c"
+
+#define TEST_UZP2(TYPE, EXPECTED_RESULT, VALUES1, VALUES2)             \
+{                                                                      \
+  TYPE expected_result = EXPECTED_RESULT;                              \
+  TYPE values1 = VALUES1;                                              \
+  TYPE values2 = VALUES2;                                              \
+  TYPE dest;                                                           \
+  dest = uzp2_##TYPE (values1, values2);                               \
+  if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0)  \
+    __builtin_abort ();                                                        \
+}
+
+int main (void)
+{
+  TEST_UZP2 (vnx2di,
+            ((vnx2di) { 5, 7, 24, 48 }),
+            ((vnx2di) { 4, 5, 6, 7 }),
+            ((vnx2di) { 12, 24, 36, 48 }));
+  TEST_UZP2 (vnx4si,
+            ((vnx4si) { 4, 6, 8, 10, 34, 36, 38, 40 }),
+            ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+            ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+  TEST_UZP2 (vnx8hi,
+            ((vnx8hi) { 4, 6, 8, 10, 12, 14, 16, 18,
+                        34, 36, 38, 40, 42, 44, 46, 48 }),
+            ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+                        11, 12, 13, 14, 15, 16, 17, 18 }),
+            ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+                        41, 42, 43, 44, 45, 46, 47, 48 }));
+  TEST_UZP2 (vnx16qi,
+            ((vnx16qi) { 5, 7, 5, 7, 5, 7, 5, 7,
+                         5, 7, 5, 7, 5, 7, 5, 7,
+                         24, 48, 24, 48, 24, 48, 24, 48,
+                         24, 48, 24, 48, 24, 48, 24, 48 }),
+            ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+                         4, 5, 6, 7, 4, 5, 6, 7,
+                         4, 5, 6, 7, 4, 5, 6, 7,
+                         4, 5, 6, 7, 4, 5, 6, 7 }),
+            ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+                         12, 24, 36, 48, 12, 24, 36, 48,
+                         12, 24, 36, 48, 12, 24, 36, 48,
+                         12, 24, 36, 48, 12, 24, 36, 48 }));
+  TEST_UZP2 (vnx2df,
+            ((vnx2df) { 5.0, 7.0, 24.0, 48.0 }),
+            ((vnx2df) { 4.0, 5.0, 6.0, 7.0 }),
+            ((vnx2df) { 12.0, 24.0, 36.0, 48.0 }));
+  TEST_UZP2 (vnx4sf,
+            ((vnx4sf) { 4.0, 6.0, 8.0, 10.0, 34.0, 36.0, 38.0, 40.0 }),
+            ((vnx4sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }),
+            ((vnx4sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 }));
+  TEST_UZP2 (vnx8hf,
+            ((vnx8hf) { 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0,
+                        34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 46.0, 48.0 }),
+            ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                        11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+            ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                        41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_1.C b/gcc/testsuite/gcc.target/aarch64/sve/vcond_1.C
new file mode 100644 (file)
index 0000000..6fd6b42
--- /dev/null
@@ -0,0 +1,245 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+
+#include <stdint.h>
+
+typedef int8_t vnx16qi __attribute__((vector_size(32)));
+typedef int16_t vnx8hi __attribute__((vector_size(32)));
+typedef int32_t vnx4si __attribute__((vector_size(32)));
+typedef int64_t vnx2di __attribute__((vector_size(32)));
+
+typedef uint8_t v32qu __attribute__((vector_size(32)));
+typedef uint16_t v16hu __attribute__((vector_size(32)));
+typedef uint32_t v8su __attribute__((vector_size(32)));
+typedef uint64_t v4du __attribute__((vector_size(32)));
+
+#define DEF_VCOND_VAR(TYPE, COND, SUFFIX)                      \
+TYPE vcond_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a, TYPE b)  \
+{                                                              \
+  TYPE r;                                                      \
+  r = a COND b ? x : y;                                                \
+  return r;                                                    \
+}
+
+#define DEF_VCOND_IMM(TYPE, COND, IMM, SUFFIX)                 \
+TYPE vcond_imm_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a)      \
+{                                                              \
+  TYPE r;                                                      \
+  r = a COND IMM ? x : y;                                      \
+  return r;                                                    \
+}
+
+#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)      \
+  T (vnx16qi, COND, SUFFIX)                            \
+  T (vnx8hi, COND, SUFFIX)                             \
+  T (vnx4si, COND, SUFFIX)                             \
+  T (vnx2di, COND, SUFFIX)
+
+#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)    \
+  T (v32qu, COND, SUFFIX)                              \
+  T (v16hu, COND, SUFFIX)                              \
+  T (v8su, COND, SUFFIX)                               \
+  T (v4du, COND, SUFFIX)
+
+#define TEST_COND_VAR_ALL(T, COND, SUFFIX)             \
+  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)           \
+  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
+
+#define TEST_VAR_ALL(T)                                \
+  TEST_COND_VAR_ALL (T, >, gt)                 \
+  TEST_COND_VAR_ALL (T, <, lt)                 \
+  TEST_COND_VAR_ALL (T, >=, ge)                        \
+  TEST_COND_VAR_ALL (T, <=, le)                        \
+  TEST_COND_VAR_ALL (T, ==, eq)                        \
+  TEST_COND_VAR_ALL (T, !=, ne)
+
+#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \
+  T (vnx16qi, COND, IMM, SUFFIX)                               \
+  T (vnx8hi, COND, IMM, SUFFIX)                                \
+  T (vnx4si, COND, IMM, SUFFIX)                                \
+  T (vnx2di, COND, IMM, SUFFIX)
+
+#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX)       \
+  T (v32qu, COND, IMM, SUFFIX)                                 \
+  T (v16hu, COND, IMM, SUFFIX)                                 \
+  T (v8su, COND, IMM, SUFFIX)                                  \
+  T (v4du, COND, IMM, SUFFIX)
+
+#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX)                \
+  TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX)      \
+  TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX)
+
+#define TEST_IMM_ALL(T)                                                        \
+  /* Expect immediates to make it into the encoding.  */               \
+  TEST_COND_IMM_ALL (T, >, 5, gt)                                      \
+  TEST_COND_IMM_ALL (T, <, 5, lt)                                      \
+  TEST_COND_IMM_ALL (T, >=, 5, ge)                                     \
+  TEST_COND_IMM_ALL (T, <=, 5, le)                                     \
+  TEST_COND_IMM_ALL (T, ==, 5, eq)                                     \
+  TEST_COND_IMM_ALL (T, !=, 5, ne)                                     \
+                                                                       \
+  TEST_COND_IMM_SIGNED_ALL (T, >, 15, gt2)                             \
+  TEST_COND_IMM_SIGNED_ALL (T, <, 15, lt2)                             \
+  TEST_COND_IMM_SIGNED_ALL (T, >=, 15, ge2)                            \
+  TEST_COND_IMM_SIGNED_ALL (T, <=, 15, le2)                            \
+  TEST_COND_IMM_SIGNED_ALL (T, ==, 15, eq2)                            \
+  TEST_COND_IMM_SIGNED_ALL (T, !=, 15, ne2)                            \
+                                                                       \
+  TEST_COND_IMM_SIGNED_ALL (T, >, -16, gt3)                            \
+  TEST_COND_IMM_SIGNED_ALL (T, <, -16, lt3)                            \
+  TEST_COND_IMM_SIGNED_ALL (T, >=, -16, ge3)                           \
+  TEST_COND_IMM_SIGNED_ALL (T, <=, -16, le3)                           \
+  TEST_COND_IMM_SIGNED_ALL (T, ==, -16, eq3)                           \
+  TEST_COND_IMM_SIGNED_ALL (T, !=, -16, ne3)                           \
+                                                                       \
+  TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, gt4)                            \
+  /* Testing if an unsigned value >= 0 or < 0 is pointless as it will  \
+     get folded away by the compiler.  */                              \
+  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, le4)                           \
+                                                                       \
+  TEST_COND_IMM_UNSIGNED_ALL (T, >, 31, gt5)                           \
+  TEST_COND_IMM_UNSIGNED_ALL (T, <, 31, lt5)                           \
+  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 31, ge5)                          \
+  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 31, le5)                          \
+                                                                       \
+  /* Expect immediates to NOT make it into the encoding, and instead be        \
+     forced into a register.  */                                       \
+  TEST_COND_IMM_ALL (T, >, 32, gt6)                                    \
+  TEST_COND_IMM_ALL (T, <, 32, lt6)                                    \
+  TEST_COND_IMM_ALL (T, >=, 32, ge6)                                   \
+  TEST_COND_IMM_ALL (T, <=, 32, le6)                                   \
+  TEST_COND_IMM_ALL (T, ==, 32, eq6)                                   \
+  TEST_COND_IMM_ALL (T, !=, 32, ne6)
+
+TEST_VAR_ALL (DEF_VCOND_VAR)
+TEST_IMM_ALL (DEF_VCOND_IMM)
+
+/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.b, p[0-7], z[0-9]+\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+
+/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+
+
+
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */
+
+/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */
+
+/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */
+
+/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */
+
+/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */
+/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */
+
+
+
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} } } */
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} } } */
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} } } */
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} } } */
+/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} } } */
+/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} } } */
+/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} } } */
+
+
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */
+
+/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */
+
+/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */
+
+/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */
+/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve/vcond_1_run.C
new file mode 100644 (file)
index 0000000..2df3371
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O" } */
+/* { dg-options "-O -msve-vector-bits=256" { target aarch64_sve256_hw } } */
+
+#include "vcond_1.C"
+
+#define NUM_ELEMS(X) (sizeof (X) / sizeof (X[0]))
+
+#define TEST_VCOND_VAR(TYPE, COND, SUFFIX)             \
+{                                                      \
+  TYPE x, y, a, b;                                     \
+  for (int i = 0; i < NUM_ELEMS (x); ++i)              \
+    {                                                  \
+      a[i] = i - 2;                                    \
+      b[i] = NUM_ELEMS (x) - 2 - i;                    \
+      x[i] = i * 2;                                    \
+      y[i] = -i * 3;                                   \
+    }                                                  \
+  TYPE r = vcond_##TYPE##_##SUFFIX (x, y, a, b);       \
+  for (int i = 0; i < NUM_ELEMS (x); ++i)              \
+    if (r[i] != (a[i] COND b[i] ? x[i] : y[i]))                \
+      __builtin_abort ();                              \
+}
+
+#define TEST_VCOND_IMM(TYPE, COND, IMM, SUFFIX)                \
+{                                                      \
+  TYPE x, y, a;                                                \
+  for (int i = 0; i < NUM_ELEMS (x); ++i)              \
+    {                                                  \
+      a[i] = IMM - 2 + i;                              \
+      x[i] = i * 2;                                    \
+      y[i] = -i * 3;                                   \
+    }                                                  \
+  TYPE r = vcond_imm_##TYPE##_##SUFFIX (x, y, a);      \
+  for (int i = 0; i < NUM_ELEMS (x); ++i)              \
+    if (r[i] != (a[i] COND IMM ? x[i] : y[i]))         \
+      __builtin_abort ();                              \
+}
+
+
+int main (int argc, char **argv)
+{
+  TEST_VAR_ALL (TEST_VCOND_VAR)
+  TEST_IMM_ALL (TEST_VCOND_IMM)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_2.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_2.c
new file mode 100644 (file)
index 0000000..53baf86
--- /dev/null
@@ -0,0 +1,318 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)       \
+  void __attribute__ ((noinline, noclone))                     \
+  vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,  \
+                                  DATA_TYPE *__restrict__ x,   \
+                                  DATA_TYPE *__restrict__ y,   \
+                                  CMP_TYPE *__restrict__ a,    \
+                                  CMP_TYPE *__restrict__ b,    \
+                                  int n)                       \
+  {                                                            \
+    for (int i = 0; i < n; i++)                                        \
+      {                                                                \
+       DATA_TYPE xval = x[i], yval = y[i];                     \
+       CMP_TYPE aval = a[i], bval = b[i];                      \
+       r[i] = aval COND bval ? xval : yval;                    \
+      }                                                                \
+  }
+
+#define DEF_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX)  \
+  void __attribute__ ((noinline, noclone))                     \
+  vcond_imm_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,  \
+                                  DATA_TYPE *__restrict__ x,   \
+                                  DATA_TYPE *__restrict__ y,   \
+                                  CMP_TYPE *__restrict__ a,    \
+                                  int n)                       \
+  {                                                            \
+    for (int i = 0; i < n; i++)                                        \
+      {                                                                \
+       DATA_TYPE xval = x[i], yval = y[i];                     \
+       CMP_TYPE aval = a[i];                                   \
+       r[i] = aval COND (CMP_TYPE) IMM ? xval : yval;          \
+      }                                                                \
+  }
+
+#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)      \
+  T (int8_t, int8_t, COND, SUFFIX)                     \
+  T (int16_t, int16_t, COND, SUFFIX)                   \
+  T (int32_t, int32_t, COND, SUFFIX)                   \
+  T (int64_t, int64_t, COND, SUFFIX)                   \
+  T (_Float16, int16_t, COND, SUFFIX##_float16)                \
+  T (float, int32_t, COND, SUFFIX##_float)             \
+  T (double, int64_t, COND, SUFFIX##_double)
+
+#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)    \
+  T (uint8_t, uint8_t, COND, SUFFIX)                   \
+  T (uint16_t, uint16_t, COND, SUFFIX)                 \
+  T (uint32_t, uint32_t, COND, SUFFIX)                 \
+  T (uint64_t, uint64_t, COND, SUFFIX)                 \
+  T (_Float16, uint16_t, COND, SUFFIX##_float16)       \
+  T (float, uint32_t, COND, SUFFIX##_float)            \
+  T (double, uint64_t, COND, SUFFIX##_double)
+
+#define TEST_COND_VAR_ALL(T, COND, SUFFIX)     \
+  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)   \
+  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
+
+#define TEST_VAR_ALL(T)                                \
+  TEST_COND_VAR_ALL (T, >, _gt)                        \
+  TEST_COND_VAR_ALL (T, <, _lt)                        \
+  TEST_COND_VAR_ALL (T, >=, _ge)               \
+  TEST_COND_VAR_ALL (T, <=, _le)               \
+  TEST_COND_VAR_ALL (T, ==, _eq)               \
+  TEST_COND_VAR_ALL (T, !=, _ne)
+
+#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \
+  T (int8_t, int8_t, COND, IMM, SUFFIX)                        \
+  T (int16_t, int16_t, COND, IMM, SUFFIX)              \
+  T (int32_t, int32_t, COND, IMM, SUFFIX)              \
+  T (int64_t, int64_t, COND, IMM, SUFFIX)              \
+  T (_Float16, int16_t, COND, IMM, SUFFIX##_float16)   \
+  T (float, int32_t, COND, IMM, SUFFIX##_float)                \
+  T (double, int64_t, COND, IMM, SUFFIX##_double)
+
+#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX)       \
+  T (uint8_t, uint8_t, COND, IMM, SUFFIX)                      \
+  T (uint16_t, uint16_t, COND, IMM, SUFFIX)                    \
+  T (uint32_t, uint32_t, COND, IMM, SUFFIX)                    \
+  T (uint64_t, uint64_t, COND, IMM, SUFFIX)                    \
+  T (_Float16, uint16_t, COND, IMM, SUFFIX##_float16)          \
+  T (float, uint32_t, COND, IMM, SUFFIX##_float)               \
+  T (double, uint64_t, COND, IMM, SUFFIX##_double)
+
+#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX)                \
+  TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX)      \
+  TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX)
+
+#define TEST_IMM_ALL(T)                                                        \
+  /* Expect immediates to make it into the encoding.  */               \
+  TEST_COND_IMM_ALL (T, >, 5, _gt)                                     \
+  TEST_COND_IMM_ALL (T, <, 5, _lt)                                     \
+  TEST_COND_IMM_ALL (T, >=, 5, _ge)                                    \
+  TEST_COND_IMM_ALL (T, <=, 5, _le)                                    \
+  TEST_COND_IMM_ALL (T, ==, 5, _eq)                                    \
+  TEST_COND_IMM_ALL (T, !=, 5, _ne)                                    \
+                                                                       \
+  TEST_COND_IMM_SIGNED_ALL (T, >, 15, _gt2)                            \
+  TEST_COND_IMM_SIGNED_ALL (T, <, 15, _lt2)                            \
+  TEST_COND_IMM_SIGNED_ALL (T, >=, 15, _ge2)                           \
+  TEST_COND_IMM_SIGNED_ALL (T, <=, 15, _le2)                           \
+  TEST_COND_IMM_ALL (T, ==, 15, _eq2)                                  \
+  TEST_COND_IMM_ALL (T, !=, 15, _ne2)                                  \
+                                                                       \
+  TEST_COND_IMM_SIGNED_ALL (T, >, 16, _gt3)                            \
+  TEST_COND_IMM_SIGNED_ALL (T, <, 16, _lt3)                            \
+  TEST_COND_IMM_SIGNED_ALL (T, >=, 16, _ge3)                           \
+  TEST_COND_IMM_SIGNED_ALL (T, <=, 16, _le3)                           \
+  TEST_COND_IMM_ALL (T, ==, 16, _eq3)                                  \
+  TEST_COND_IMM_ALL (T, !=, 16, _ne3)                                  \
+                                                                       \
+  TEST_COND_IMM_SIGNED_ALL (T, >, -16, _gt4)                           \
+  TEST_COND_IMM_SIGNED_ALL (T, <, -16, _lt4)                           \
+  TEST_COND_IMM_SIGNED_ALL (T, >=, -16, _ge4)                          \
+  TEST_COND_IMM_SIGNED_ALL (T, <=, -16, _le4)                          \
+  TEST_COND_IMM_ALL (T, ==, -16, _eq4)                                 \
+  TEST_COND_IMM_ALL (T, !=, -16, _ne4)                                 \
+                                                                       \
+  TEST_COND_IMM_SIGNED_ALL (T, >, -17, _gt5)                           \
+  TEST_COND_IMM_SIGNED_ALL (T, <, -17, _lt5)                           \
+  TEST_COND_IMM_SIGNED_ALL (T, >=, -17, _ge5)                          \
+  TEST_COND_IMM_SIGNED_ALL (T, <=, -17, _le5)                          \
+  TEST_COND_IMM_ALL (T, ==, -17, _eq5)                                 \
+  TEST_COND_IMM_ALL (T, !=, -17, _ne5)                                 \
+                                                                       \
+  TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, _gt6)                           \
+  /* Testing if an unsigned value >= 0 or < 0 is pointless as it will  \
+     get folded away by the compiler.  */                              \
+  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, _le6)                          \
+                                                                       \
+  TEST_COND_IMM_UNSIGNED_ALL (T, >, 127, _gt7)                         \
+  TEST_COND_IMM_UNSIGNED_ALL (T, <, 127, _lt7)                         \
+  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 127, _ge7)                                \
+  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 127, _le7)                                \
+                                                                       \
+  /* Expect immediates to NOT make it into the encoding, and instead be \
+     forced into a register.  */                                       \
+  TEST_COND_IMM_UNSIGNED_ALL (T, >, 128, _gt8)                         \
+  TEST_COND_IMM_UNSIGNED_ALL (T, <, 128, _lt8)                         \
+  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 128, _ge8)                                \
+  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 128, _le8)
+
+TEST_VAR_ALL (DEF_VCOND_VAR)
+TEST_IMM_ALL (DEF_VCOND_IMM)
+
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-7], z[0-9]+\.b, z[0-9]+\.b\n} 66 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 132 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 132 } } */
+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 132 } } */
+
+/* There are two signed ordered register comparisons for .b, one for a
+   variable comparison and one for one of the two out-of-range constant
+   comparisons.  The other out-of-ranger constant comparison can be
+   adjusted to an in-range value by inverting the handling of equality.
+
+   The same pattern appears twice for .h, .s and .d, once for integer data
+   and once for floating-point data.  */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+
+/* Out-of-range >= is converted to in-range >.  */
+/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+
+/* Out-of-range < is converted to in-range <=.  */
+/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+
+/* 6 for .b: {signed, unsigned\n} x {variable, too high, too low}.  */
+/* 12 for .h,.s and .d: the above 6 repeated for integer and floating-point
+   data.  */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
+
+/* Also used for >= 16. */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */
+
+/* gcc converts "a < 15" into "a <= 14".  */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #14\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #14\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #14\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #14\n} 2 } } */
+
+/* gcc converts "a >= 15" into "a > 14".  */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #14\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #14\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #14\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #14\n} 2 } } */
+
+/* Also used for < 16.  */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */
+
+/* Appears once for each signedness.  */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */
+
+/* gcc converts "a > -16" into "a >= -15".  */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-15\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-15\n} 2 } } */
+
+/* Also used for <= -17.  */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */
+
+/* Also used for > -17.  */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */
+
+/* gcc converts "a <= -16" into "a < -15".  */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-15\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-15\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-15\n} 2 } } */
+
+/* gcc converts "a > 0" into "a != 0".  */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} 2 } } */
+
+/* gcc converts "a <= 0" into "a == 0".  */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} 2 } } */
+
+/* Also used for >= 128.  */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #127\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #127\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #127\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #127\n} 4 } } */
+
+/* gcc converts "a < 127" into "a <= 126".  */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #126\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #126\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #126\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #126\n} 2 } } */
+
+/* gcc converts "a >= 127" into "a > 126".  */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #126\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #126\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #126\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #126\n} 2 } } */
+
+/* Also used for < 128.  */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #127\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #127\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #127\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #127\n} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_2_run.c
new file mode 100644 (file)
index 0000000..5432f59
--- /dev/null
@@ -0,0 +1,49 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "vcond_2.c"
+
+#define N 97
+
+#define TEST_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)      \
+{                                                              \
+  DATA_TYPE x[N], y[N], r[N];                                  \
+  CMP_TYPE a[N], b[N];                                         \
+  for (int i = 0; i < N; ++i)                                  \
+    {                                                          \
+      x[i] = i;                                                        \
+      y[i] = (i & 1) + 5;                                      \
+      a[i] = i - N / 3;                                                \
+      b[i] = N - N / 3 - i;                                    \
+      asm volatile ("" ::: "memory");                          \
+    }                                                          \
+  vcond_var_##CMP_TYPE##_##SUFFIX (r, x, y, a, b, N);          \
+  for (int i = 0; i < N; ++i)                                  \
+    if (r[i] != (a[i] COND b[i] ? x[i] : y[i]))                        \
+      __builtin_abort ();                                      \
+}
+
+#define TEST_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \
+{                                                              \
+  DATA_TYPE x[N], y[N], r[N];                                  \
+  CMP_TYPE a[N];                                               \
+  for (int i = 0; i < N; ++i)                                  \
+    {                                                          \
+      x[i] = i;                                                        \
+      y[i] = (i & 1) + 5;                                      \
+      a[i] = IMM - N / 3 + i;                                  \
+      asm volatile ("" ::: "memory");                          \
+    }                                                          \
+  vcond_imm_##CMP_TYPE##_##SUFFIX (r, x, y, a, N);             \
+  for (int i = 0; i < N; ++i)                                  \
+    if (r[i] != (a[i] COND (CMP_TYPE) IMM ? x[i] : y[i]))      \
+      __builtin_abort ();                                      \
+}
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+  TEST_VAR_ALL (TEST_VCOND_VAR)
+  TEST_IMM_ALL (TEST_VCOND_IMM)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_3.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_3.c
new file mode 100644 (file)
index 0000000..7dee996
--- /dev/null
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_SEL_IMM(TYPE, SUFFIX, IMM)                                 \
+void                                                                   \
+sel_##TYPE##_##SUFFIX (TYPE *restrict a, TYPE *restrict b, int n)      \
+{                                                                      \
+  for (int i = 0; i < n; i++)                                          \
+    a[i] = b[i] != 0 ? IMM : 0;                                                \
+}
+
+#define DEF_SEL_VAR(TYPE)                                              \
+void                                                                   \
+sel_##TYPE##_var (TYPE *restrict a, TYPE *restrict b, TYPE val, int n) \
+{                                                                      \
+  for (int i = 0; i < n; i++)                                          \
+    a[i] = b[i] != 0 ? val : 0;                                                \
+}
+
+#define TEST_TYPE8(TYPE)                       \
+  DEF_SEL_VAR (TYPE)                           \
+  DEF_SEL_IMM (TYPE, m128, -128)               \
+  DEF_SEL_IMM (TYPE, m127, -127)               \
+  DEF_SEL_IMM (TYPE, 2, 2)                     \
+  DEF_SEL_IMM (TYPE, 127, 127)
+
+#define TEST_TYPE16(TYPE)                      \
+  TEST_TYPE8 (TYPE)                            \
+  DEF_SEL_IMM (TYPE, m32768, -32768)           \
+  DEF_SEL_IMM (TYPE, m32767, -32767)           \
+  DEF_SEL_IMM (TYPE, m32512, -32512)           \
+  DEF_SEL_IMM (TYPE, m32511, -32511)           \
+  DEF_SEL_IMM (TYPE, m256, -256)               \
+  DEF_SEL_IMM (TYPE, m255, -255)               \
+  DEF_SEL_IMM (TYPE, m129, -129)               \
+  DEF_SEL_IMM (TYPE, 128, 128)                 \
+  DEF_SEL_IMM (TYPE, 256, 256)                 \
+  DEF_SEL_IMM (TYPE, 32511, 32511)             \
+  DEF_SEL_IMM (TYPE, 32512, 32512)             \
+  DEF_SEL_IMM (TYPE, 32767, 32767)
+
+#define TEST_TYPE32(TYPE)                      \
+  TEST_TYPE16 (TYPE)                           \
+  DEF_SEL_IMM (TYPE, m65536, -65536)           \
+  DEF_SEL_IMM (TYPE, m32769, -32769)           \
+  DEF_SEL_IMM (TYPE, 32768, 32768)
+
+TEST_TYPE8 (int8_t)
+TEST_TYPE16 (int16_t)
+TEST_TYPE32 (int32_t)
+TEST_TYPE32 (int64_t)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #-128\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #-127\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #127\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-32768\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-32512\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-256\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-128\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-127\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #2\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #127\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #256\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #32512\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
new file mode 100644 (file)
index 0000000..8d13701
--- /dev/null
@@ -0,0 +1,139 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define eq(A, B) ((A) == (B))
+#define ne(A, B) ((A) != (B))
+#define olt(A, B) ((A) < (B))
+#define ole(A, B) ((A) <= (B))
+#define oge(A, B) ((A) >= (B))
+#define ogt(A, B) ((A) > (B))
+#define ordered(A, B) (!__builtin_isunordered (A, B))
+#define unordered(A, B) (__builtin_isunordered (A, B))
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
+#define ult(A, B) (__builtin_isless (A, B))
+#define ule(A, B) (__builtin_islessequal (A, B))
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
+#define ugt(A, B) (__builtin_isgreater (A, B))
+#define nueq(A, B) (__builtin_islessgreater (A, B))
+#define nult(A, B) (!__builtin_isless (A, B))
+#define nule(A, B) (!__builtin_islessequal (A, B))
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
+#define nugt(A, B) (!__builtin_isgreater (A, B))
+
+#define TEST_LOOP(TYPE1, TYPE2, CMP)                           \
+  void __attribute__ ((noinline, noclone))                     \
+  test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,  \
+                                       TYPE1 *restrict src,    \
+                                       TYPE1 fallback,         \
+                                       TYPE2 *restrict a,      \
+                                       TYPE2 *restrict b,      \
+                                       int count)              \
+  {                                                            \
+    for (int i = 0; i < count; ++i)                            \
+      dest[i] = CMP (a[i], b[i]) ? src[i] : fallback;          \
+  }                                                            \
+                                                               \
+  void __attribute__ ((noinline, noclone))                     \
+  test_##TYPE1##_##TYPE2##_##CMP##_zero (TYPE1 *restrict dest, \
+                                        TYPE1 *restrict src,   \
+                                        TYPE1 fallback,        \
+                                        TYPE2 *restrict a,     \
+                                        int count)             \
+  {                                                            \
+    for (int i = 0; i < count; ++i)                            \
+      dest[i] = CMP (a[i], 0) ? src[i] : fallback;             \
+  }                                                            \
+                                                               \
+  void __attribute__ ((noinline, noclone))                     \
+  test_##TYPE1##_##TYPE2##_##CMP##_sel (TYPE1 *restrict dest,  \
+                                       TYPE1 if_true,          \
+                                       TYPE1 if_false,         \
+                                       TYPE2 *restrict a,      \
+                                       TYPE2 b, int count)     \
+  {                                                            \
+    for (int i = 0; i < count; ++i)                            \
+      dest[i] = CMP (a[i], b) ? if_true : if_false;            \
+  }
+
+#define TEST_CMP(CMP) \
+  TEST_LOOP (int32_t, float, CMP) \
+  TEST_LOOP (uint32_t, float, CMP) \
+  TEST_LOOP (int64_t, float, CMP) \
+  TEST_LOOP (uint64_t, float, CMP) \
+  TEST_LOOP (float, float, CMP) \
+  TEST_LOOP (int32_t, double, CMP) \
+  TEST_LOOP (uint32_t, double, CMP) \
+  TEST_LOOP (int64_t, double, CMP) \
+  TEST_LOOP (uint64_t, double, CMP) \
+  TEST_LOOP (double, double, CMP)
+
+TEST_CMP (eq)
+TEST_CMP (ne)
+TEST_CMP (olt)
+TEST_CMP (ole)
+TEST_CMP (oge)
+TEST_CMP (ogt)
+TEST_CMP (ordered)
+TEST_CMP (unordered)
+TEST_CMP (ueq)
+TEST_CMP (ult)
+TEST_CMP (ule)
+TEST_CMP (uge)
+TEST_CMP (ugt)
+TEST_CMP (nueq)
+TEST_CMP (nult)
+TEST_CMP (nule)
+TEST_CMP (nuge)
+TEST_CMP (nugt)
+
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 5 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 10 { xfail *-*-* } } } */
+
+/* 5 for ne, 5 for ueq and 5 for nueq.  */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
+
+/* 5 for lt, 5 for ult and 5 for nult.  */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
+
+/* 5 for le, 5 for ule and 5 for nule.  */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
+
+/* 5 for gt, 5 for ugt and 5 for nugt.  */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
+
+/* 5 for ge, 5 for uge and 5 for nuge.  */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
+
+/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
+/* 3 loops * 5 invocations for all 12 unordered comparisons.  */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 } } */
+
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 7 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 14 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+
+/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
+/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
+   for all 12 unordered comparisons.  */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_run.c
new file mode 100644 (file)
index 0000000..c345087
--- /dev/null
@@ -0,0 +1,88 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-require-effective-target fenv_exceptions } */
+
+#ifndef TEST_EXCEPTIONS
+#define TEST_EXCEPTIONS 1
+#endif
+
+#include <fenv.h>
+
+#include "vcond_4.c"
+
+#define N 401
+
+#define RUN_LOOP(TYPE1, TYPE2, CMP, EXPECT_INVALID)                    \
+  {                                                                    \
+    TYPE1 dest1[N], dest2[N], dest3[N], src[N];                                \
+    TYPE2 a[N], b[N];                                                  \
+    for (int i = 0; i < N; ++i)                                                \
+      {                                                                        \
+       src[i] = i * i;                                                 \
+       if (i % 5 == 0)                                                 \
+         a[i] = 0;                                                     \
+       else if (i % 3)                                                 \
+         a[i] = i * 0.1;                                               \
+       else                                                            \
+         a[i] = i;                                                     \
+       if (i % 7 == 0)                                                 \
+         b[i] = __builtin_nan ("");                                    \
+       else if (i % 6)                                                 \
+         b[i] = i * 0.1;                                               \
+       else                                                            \
+         b[i] = i;                                                     \
+       asm volatile ("" ::: "memory");                                 \
+      }                                                                        \
+    feclearexcept (FE_ALL_EXCEPT);                                     \
+    test_##TYPE1##_##TYPE2##_##CMP##_var (dest1, src, 11, a, b, N);    \
+    test_##TYPE1##_##TYPE2##_##CMP##_zero (dest2, src, 22, a, N);      \
+    test_##TYPE1##_##TYPE2##_##CMP##_sel (dest3, 33, 44, a, 9, N);     \
+    if (TEST_EXCEPTIONS                                                        \
+       && !fetestexcept (FE_INVALID) != !(EXPECT_INVALID))             \
+      __builtin_abort ();                                              \
+    for (int i = 0; i < N; ++i)                                                \
+      {                                                                        \
+       if (dest1[i] != (CMP (a[i], b[i]) ? src[i] : 11))               \
+         __builtin_abort ();                                           \
+       if (dest2[i] != (CMP (a[i], 0) ? src[i] : 22))                  \
+         __builtin_abort ();                                           \
+       if (dest3[i] != (CMP (a[i], 9) ? 33 : 44))                      \
+         __builtin_abort ();                                           \
+      }                                                                        \
+  }
+
+#define RUN_CMP(CMP, EXPECT_INVALID) \
+  RUN_LOOP (int32_t, float, CMP, EXPECT_INVALID) \
+  RUN_LOOP (uint32_t, float, CMP, EXPECT_INVALID) \
+  RUN_LOOP (int64_t, float, CMP, EXPECT_INVALID) \
+  RUN_LOOP (uint64_t, float, CMP, EXPECT_INVALID) \
+  RUN_LOOP (float, float, CMP, EXPECT_INVALID) \
+  RUN_LOOP (int32_t, double, CMP, EXPECT_INVALID) \
+  RUN_LOOP (uint32_t, double, CMP, EXPECT_INVALID) \
+  RUN_LOOP (int64_t, double, CMP, EXPECT_INVALID) \
+  RUN_LOOP (uint64_t, double, CMP, EXPECT_INVALID) \
+  RUN_LOOP (double, double, CMP, EXPECT_INVALID)
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  RUN_CMP (eq, 0)
+  RUN_CMP (ne, 0)
+  RUN_CMP (olt, 1)
+  RUN_CMP (ole, 1)
+  RUN_CMP (oge, 1)
+  RUN_CMP (ogt, 1)
+  RUN_CMP (ordered, 0)
+  RUN_CMP (unordered, 0)
+  RUN_CMP (ueq, 0)
+  RUN_CMP (ult, 0)
+  RUN_CMP (ule, 0)
+  RUN_CMP (uge, 0)
+  RUN_CMP (ugt, 0)
+  RUN_CMP (nueq, 0)
+  RUN_CMP (nult, 0)
+  RUN_CMP (nule, 0)
+  RUN_CMP (nuge, 0)
+  RUN_CMP (nugt, 0)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
new file mode 100644 (file)
index 0000000..ddc8038
--- /dev/null
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */
+
+/* The difference here is that nueq can use LTGT.  */
+
+#include "vcond_4.c"
+
+/* 5 for eqand 5 for ueq.  */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 10 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 20 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 5 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 10 { xfail *-*-* } } } */
+
+/* 5 for lt, 5 for ult, 5 for nueq and 5 for nult.  */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
+
+/* 5 for le, 5 for ule and 5 for nule.  */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
+
+/* 5 for gt, 5 for ugt, 5 for nueq and 5 for nugt.  */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
+
+/* 5 for ge, 5 for uge and 5 for nuge.  */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
+
+/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} } } */
+/* 3 loops * 5 invocations for ordered, unordered amd ueq.  */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 45 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 14 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 28 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 7 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 14 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
+
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
+
+/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} } } */
+/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
+   for ordered, unordered and ueq.  */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 63 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_5_run.c
new file mode 100644 (file)
index 0000000..deec44c
--- /dev/null
@@ -0,0 +1,6 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */
+/* { dg-require-effective-target fenv_exceptions } */
+
+#define TEST_EXCEPTIONS 0
+#include "vcond_4_run.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_6.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_6.c
new file mode 100644 (file)
index 0000000..6c6a84a
--- /dev/null
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define b_and(A, B) ((A) & (B))
+#define b_orr(A, B) ((A) | (B))
+#define b_eor(A, B) ((A) ^ (B))
+#define b_nand(A, B) (!((A) & (B)))
+#define b_nor(A, B) (!((A) | (B)))
+#define b_bic(A, B) ((A) & !(B))
+#define b_orn(A, B) ((A) | !(B))
+
+#define LOOP(TYPE, BINOP)                                              \
+  void __attribute__ ((noinline, noclone))                             \
+  test_##TYPE##_##BINOP (TYPE *restrict dest, TYPE *restrict src,      \
+                        TYPE *restrict a, TYPE *restrict b,            \
+                        TYPE *restrict c, TYPE *restrict d,            \
+                        TYPE fallback, int count)                      \
+  {                                                                    \
+    for (int i = 0; i < count; ++i)                                    \
+      dest[i] = (BINOP (__builtin_isunordered (a[i], b[i]),            \
+                       __builtin_isunordered (c[i], d[i]))             \
+                ? src[i] : fallback);                                  \
+  }
+
+#define TEST_BINOP(T, BINOP) \
+  T (_Float16, BINOP) \
+  T (float, BINOP) \
+  T (double, BINOP)
+
+#define TEST_ALL(T) \
+  TEST_BINOP (T, b_and) \
+  TEST_BINOP (T, b_orr) \
+  TEST_BINOP (T, b_eor) \
+  TEST_BINOP (T, b_nand) \
+  TEST_BINOP (T, b_nor) \
+  TEST_BINOP (T, b_bic) \
+  TEST_BINOP (T, b_orn)
+
+TEST_ALL (LOOP)
+
+/* { dg-final { scan-assembler-times {\tand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
+/* { dg-final { scan-assembler {\tand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} } } */
+/* { dg-final { scan-assembler-times {\torr\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
+/* { dg-final { scan-assembler-times {\teor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
+/* { dg-final { scan-assembler-times {\tnand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
+/* { dg-final { scan-assembler-times {\tnor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
+/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
+/* { dg-final { scan-assembler-times {\torn\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_6_run.c
new file mode 100644 (file)
index 0000000..40b68ce
--- /dev/null
@@ -0,0 +1,35 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "vcond_6.c"
+
+#define N 401
+
+#define RUN_LOOP(TYPE, BINOP)                                          \
+  {                                                                    \
+    TYPE dest[N], src[N], a[N], b[N], c[N], d[N];                      \
+    for (int i = 0; i < N; ++i)                                                \
+      {                                                                        \
+       src[i] = i * i;                                                 \
+       a[i] = i % 5 < 3 ? __builtin_nan("") : i;                       \
+       b[i] = i % 7 < 4 ? __builtin_nan("") : i;                       \
+       c[i] = i % 9 < 5 ? __builtin_nan("") : i;                       \
+       d[i] = i % 11 < 6 ? __builtin_nan("") : i;                      \
+       asm volatile ("" ::: "memory");                                 \
+      }                                                                        \
+    test_##TYPE##_##BINOP (dest, src, a, b, c, d, 100, N);             \
+    for (int i = 0; i < N; ++i)                                                \
+      {                                                                        \
+       int res = BINOP (__builtin_isunordered (a[i], b[i]),            \
+                        __builtin_isunordered (c[i], d[i]));           \
+       if (dest[i] != (res ? src[i] : 100.0))                          \
+         __builtin_abort ();                                           \
+      }                                                                        \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_ALL (RUN_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_1.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_1.c
new file mode 100644 (file)
index 0000000..6042606
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define NUM_ELEMS(TYPE) (128 / sizeof (TYPE))
+
+#define DUP_FN(TYPE)                           \
+void __attribute__ ((noinline, noclone))       \
+dup_##TYPE (TYPE *r, TYPE v)                   \
+{                                              \
+  for (int i = 0; i < NUM_ELEMS (TYPE); i++)   \
+    r[i] = v;                                  \
+}
+
+DUP_FN (int8_t)
+DUP_FN (int16_t)
+DUP_FN (int32_t)
+DUP_FN (int64_t)
+DUP_FN (_Float16)
+DUP_FN (float)
+DUP_FN (double)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, w[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, w[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, w[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, x[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, h[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, s[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, d[0-9]+\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_1_run.c
new file mode 100644 (file)
index 0000000..539ba2c
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "vec_init_1.c"
+
+#define TEST_INIT_VECTOR(TYPE, VAL)            \
+  {                                            \
+  TYPE r[NUM_ELEMS (TYPE)];                    \
+  dup_##TYPE (r, VAL);                         \
+  for (int i = 0; i < NUM_ELEMS (TYPE); i++)   \
+    if (r[i] != VAL)                           \
+      __builtin_abort ();                      \
+  }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  TEST_INIT_VECTOR (int8_t, 0x2a);
+  TEST_INIT_VECTOR (int16_t, 0x3976);
+  TEST_INIT_VECTOR (int32_t, 0x31232976);
+  TEST_INIT_VECTOR (int64_t, 0x9489363731232976LL);
+
+  TEST_INIT_VECTOR (_Float16, -0x1.fp10);
+  TEST_INIT_VECTOR (float, -0x1.fe02p10);
+  TEST_INIT_VECTOR (double, 0x1.fe02eeeee1p10);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_2.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_2.c
new file mode 100644 (file)
index 0000000..e346146
--- /dev/null
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256" } */
+
+typedef unsigned int vnx4si __attribute__ ((vector_size(32)));
+
+void
+f (vnx4si *ptr, int x)
+{
+  *ptr += (vnx4si) { x, x, 1, 2, 3, x, x, 4 };
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_1.c
new file mode 100644 (file)
index 0000000..74a48bf
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define VEC_PERM(TYPE, MASKTYPE)                                       \
+TYPE __attribute__ ((noinline, noclone))                               \
+vec_perm_##TYPE (TYPE values1, TYPE values2, MASKTYPE mask)            \
+{                                                                      \
+  return __builtin_shuffle (values1, values2, mask);                   \
+}
+
+VEC_PERM (vnx2di, vnx2di);
+VEC_PERM (vnx4si, vnx4si);
+VEC_PERM (vnx8hi, vnx8hi);
+VEC_PERM (vnx16qi, vnx16qi);
+VEC_PERM (vnx2df, vnx2di);
+VEC_PERM (vnx4sf, vnx4si);
+VEC_PERM (vnx8hf, vnx8hi);
+
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_1_overrange_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_1_overrange_run.c
new file mode 100644 (file)
index 0000000..317cae6
--- /dev/null
@@ -0,0 +1,111 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O" } */
+/* { dg-options "-O -msve-vector-bits=256" { target aarch64_sve256_hw } } */
+
+#include "vec_perm_1.c"
+
+#define TEST_VEC_PERM(TYPE, MASK_TYPE, EXPECTED_RESULT,                        \
+                     VALUES1, VALUES2, MASK)                           \
+{                                                                      \
+  TYPE expected_result = EXPECTED_RESULT;                              \
+  TYPE values1 = VALUES1;                                              \
+  TYPE values2 = VALUES2;                                              \
+  MASK_TYPE mask = MASK;                                               \
+  TYPE dest;                                                           \
+  dest = vec_perm_##TYPE (values1, values2, mask);                     \
+  if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0)  \
+    __builtin_abort ();                                                        \
+}
+
+int main (void)
+{
+  TEST_VEC_PERM (vnx2di, vnx2di,
+                ((vnx2di) { 5, 36, 7, 48 }),
+                ((vnx2di) { 4, 5, 6, 7 }),
+                ((vnx2di) { 12, 24, 36, 48 }),
+                ((vnx2di) { 1 + (8 * 1), 6 + (8 * 3),
+                            3 + (8 * 1), 7 + (8 * 5) }));
+  TEST_VEC_PERM (vnx4si, vnx4si,
+                ((vnx4si) { 34, 38, 40, 10, 9, 8, 7, 35 }),
+                ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+                ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }),
+                ((vnx4si) { 9 + (16 * 2), 13 + (16 * 5),
+                            15 + (16 * 1), 7 + (16 * 0),
+                            6 + (16 * 8), 5 + (16 * 2),
+                            4 + (16 * 3), 10 + (16 * 2) }));
+  TEST_VEC_PERM (vnx8hi, vnx8hi,
+                ((vnx8hi) { 12, 16, 18, 10, 42, 43, 44, 34,
+                            7, 48, 3, 35, 9, 8, 7, 13 }),
+                ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+                            11, 12, 13, 14, 15, 16, 17, 18 }),
+                ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+                            41, 42, 43, 44, 45, 46, 47, 48 }),
+                ((vnx8hi) { 9 + (32 * 2), 13 + (32 * 2),
+                            15 + (32 * 8), 7 + (32 * 9),
+                            25 + (32 * 4), 26 + (32 * 3),
+                            27 + (32 * 1), 17 + (32 * 2),
+                            4 + (32 * 6), 31 + (32 * 7),
+                            0 + (32 * 8), 18 + (32 * 9),
+                            6 + (32 * 6), 5 + (32 * 7),
+                            4 + (32 * 2), 10 + (32 * 2) }));
+  TEST_VEC_PERM (vnx16qi, vnx16qi,
+                ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5,
+                             6, 7, 12, 24, 36, 48, 12, 24,
+                             5, 6, 7, 4, 5, 6, 4, 5,
+                             6, 7, 12, 24, 36, 48, 12, 24 }),
+                ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7 }),
+                ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48 }),
+                ((vnx16qi) { 5 + (64 * 3), 6 + (64 * 1),
+                             7 + (64 * 2), 8 + (64 * 1),
+                             9 + (64 * 3), 10 + (64 * 1),
+                             28 + (64 * 3), 29 + (64 * 3),
+                             30 + (64 * 1), 31 + (64 * 1),
+                             32 + (64 * 3), 33 + (64 * 2),
+                             54 + (64 * 2), 55 + (64 * 2),
+                             56 + (64 * 1), 61 + (64 * 2),
+                             5 + (64 * 2), 6 + (64 * 1),
+                             7 + (64 * 2), 8 + (64 * 2),
+                             9 + (64 * 2), 10 + (64 * 1),
+                             28 + (64 * 3), 29 + (64 * 1),
+                             30 + (64 * 3), 31 + (64 * 3),
+                             32 + (64 * 1), 33 + (64 * 1),
+                             54 + (64 * 2), 55 + (64 * 2),
+                             56 + (64 * 2), 61 + (64 * 2) }));
+  TEST_VEC_PERM (vnx2df, vnx2di,
+                ((vnx2df) { 5.1, 36.1, 7.1, 48.1 }),
+                ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }),
+                ((vnx2df) { 12.1, 24.1, 36.1, 48.1 }),
+                ((vnx2di) { 1 + (8 * 3), 6 + (8 * 10),
+                            3 + (8 * 8), 7 + (8 * 2) }));
+  TEST_VEC_PERM (vnx4sf, vnx4si,
+                ((vnx4sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }),
+                ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
+                ((vnx4sf) { 33.2, 34.2, 35.2, 36.2,
+                            37.2, 38.2, 39.2, 40.2 }),
+                ((vnx4si) { 9 + (16 * 1), 13 + (16 * 5),
+                            15 + (16 * 4), 7 + (16 * 4),
+                            6 + (16 * 3), 5 + (16 * 2),
+                            4 + (16 * 1), 10 + (16 * 0) }));
+  TEST_VEC_PERM (vnx8hf, vnx8hi,
+                ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0,
+                            7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }),
+                ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                            11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+                ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                            41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }),
+                ((vnx8hi) { 9 + (32 * 2), 13 + (32 * 2),
+                            15 + (32 * 8), 7 + (32 * 9),
+                            25 + (32 * 4), 26 + (32 * 3),
+                            27 + (32 * 1), 17 + (32 * 2),
+                            4 + (32 * 6), 31 + (32 * 7),
+                            0 + (32 * 8), 18 + (32 * 9),
+                            6 + (32 * 6), 5 + (32 * 7),
+                            4 + (32 * 2), 10 + (32 * 2) }));
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_1_run.c
new file mode 100644 (file)
index 0000000..bed57f5
--- /dev/null
@@ -0,0 +1,79 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O" } */
+/* { dg-options "-O -msve-vector-bits=256" { target aarch64_sve256_hw } } */
+
+#include "vec_perm_1.c"
+
+#define TEST_VEC_PERM(TYPE, MASK_TYPE, EXPECTED_RESULT,                        \
+                     VALUES1, VALUES2, MASK)                           \
+{                                                                      \
+  TYPE expected_result = EXPECTED_RESULT;                              \
+  TYPE values1 = VALUES1;                                              \
+  TYPE values2 = VALUES2;                                              \
+  MASK_TYPE mask = MASK;                                               \
+  TYPE dest;                                                           \
+  dest = vec_perm_##TYPE (values1, values2, mask);                     \
+  if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0)  \
+    __builtin_abort ();                                                        \
+}
+
+int main (void)
+{
+  TEST_VEC_PERM (vnx2di, vnx2di,
+                ((vnx2di) { 5, 36, 7, 48 }),
+                ((vnx2di) { 4, 5, 6, 7 }),
+                ((vnx2di) { 12, 24, 36, 48 }),
+                ((vnx2di) { 1, 6, 3, 7 }));
+  TEST_VEC_PERM (vnx4si, vnx4si,
+                ((vnx4si) { 34, 38, 40, 10, 9, 8, 7, 35 }),
+                ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+                ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }),
+                ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+  TEST_VEC_PERM (vnx8hi, vnx8hi,
+                ((vnx8hi) { 12, 16, 18, 10, 42, 43, 44, 34,
+                            7, 48, 3, 35, 9, 8, 7, 13 }),
+                ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+                            11, 12, 13, 14, 15, 16, 17, 18 }),
+                ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+                            41, 42, 43, 44, 45, 46, 47, 48 }),
+                ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+                            4, 31, 0, 18, 6, 5, 4, 10 }));
+  TEST_VEC_PERM (vnx16qi, vnx16qi,
+                ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5,
+                             6, 7, 12, 24, 36, 48, 12, 24,
+                             5, 6, 7, 4, 5, 6, 4, 5,
+                             6, 7, 12, 24, 36, 48, 12, 24 }),
+                ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7 }),
+                ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48 }),
+                ((vnx16qi) { 5, 6, 7, 8, 9, 10, 28, 29,
+                             30, 31, 32, 33, 54, 55, 56, 61,
+                             5, 6, 7, 8, 9, 10, 28, 29,
+                             30, 31, 32, 33, 54, 55, 56, 61 }));
+  TEST_VEC_PERM (vnx2df, vnx2di,
+                ((vnx2df) { 5.1, 36.1, 7.1, 48.1 }),
+                ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }),
+                ((vnx2df) { 12.1, 24.1, 36.1, 48.1 }),
+                ((vnx2di) { 1, 6, 3, 7 }));
+  TEST_VEC_PERM (vnx4sf, vnx4si,
+                ((vnx4sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }),
+                ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
+                ((vnx4sf) { 33.2, 34.2, 35.2, 36.2,
+                            37.2, 38.2, 39.2, 40.2 }),
+                ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+  TEST_VEC_PERM (vnx8hf, vnx8hi,
+                ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0,
+                            7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }),
+                ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                            11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+                ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                            41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }),
+                ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+                            4, 31, 0, 18, 6, 5, 4, 10 }));
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_1.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_1.c
new file mode 100644 (file)
index 0000000..3194342
--- /dev/null
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define VEC_PERM_CONST(TYPE, MASK)                     \
+TYPE __attribute__ ((noinline, noclone))               \
+vec_perm_##TYPE (TYPE values1, TYPE values2)           \
+{                                                      \
+  return __builtin_shuffle (values1, values2, MASK);   \
+}
+
+VEC_PERM_CONST (vnx2di,  ((vnx2di)  { 4, 3, 6, 1 }));
+VEC_PERM_CONST (vnx4si,  ((vnx4si)  { 3, 9, 11, 12, 2, 4, 4, 2 }));
+VEC_PERM_CONST (vnx8hi,  ((vnx8hi)  { 8, 27, 5, 4, 21, 12, 13, 0,
+                                     22, 1, 8, 9, 3, 24, 15, 1 }));
+VEC_PERM_CONST (vnx16qi, ((vnx16qi) { 13, 31, 11, 2, 48, 28, 3, 4,
+                                     54, 11, 30, 1, 0, 61, 2, 3,
+                                     4, 5, 11, 63, 24, 11, 42, 39,
+                                     2, 57, 22, 11, 6, 16, 18, 21 }));
+VEC_PERM_CONST (vnx2df,  ((vnx2di) { 7, 3, 2, 1 }));
+VEC_PERM_CONST (vnx4sf,  ((vnx4si) { 1, 9, 13, 11, 2, 5, 4, 2 }));
+VEC_PERM_CONST (vnx8hf,  ((vnx8hi) { 8, 27, 5, 4, 21, 12, 13, 0,
+                                    22, 1, 8, 9, 3, 24, 15, 1 }));
+
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_1_overrun.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_1_overrun.c
new file mode 100644 (file)
index 0000000..b0732d0
--- /dev/null
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define VEC_PERM_CONST_OVERRUN(TYPE, MASK)                     \
+TYPE vec_perm_overrun_##TYPE (TYPE values1, TYPE values2)      \
+{                                                              \
+  return __builtin_shuffle (values1, values2, MASK);           \
+}
+
+VEC_PERM_CONST_OVERRUN (vnx2di,  ((vnx2di)  { 4 + (8 * 1), 3 + (8 * 1),
+                                             6 + (8 * 2), 1 + (8 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx4si,  ((vnx4si)  { 3 + (16 * 3), 9 + (16 * 4),
+                                             11 + (16 * 5), 12 + (16 * 3),
+                                             2 + (16 * 2), 4 + (16 * 1),
+                                             4 + (16 * 2), 2 + (16 * 1) }));
+VEC_PERM_CONST_OVERRUN (vnx8hi,  ((vnx8hi)  { 8 + (32 * 3), 27 + (32 * 1),
+                                             5 + (32 * 3), 4 + (32 * 3),
+                                             21 + (32 * 1), 12 + (32 * 3),
+                                             13 + (32 * 3), 0 + (32 * 1),
+                                             22 + (32 * 2), 1 + (32 * 2),
+                                             8 + (32 * 2), 9 + (32 * 1),
+                                             3 + (32 * 2), 24 + (32 * 2),
+                                             15 + (32 * 1), 1 + (32 * 1) }));
+VEC_PERM_CONST_OVERRUN (vnx16qi, ((vnx16qi) { 13 + (64 * 2), 31 + (64 * 2),
+                                             11 + (64 * 2), 2 + (64 * 1),
+                                             48 + (64 * 1), 28 + (64 * 2),
+                                             3 + (64 * 2), 4 + (64 * 3),
+                                             54 + (64 * 1), 11 + (64 * 2),
+                                             30 + (64 * 2), 1 + (64 * 1),
+                                             0 + (64 * 1), 61 + (64 * 2),
+                                             2 + (64 * 3), 3 + (64 * 2),
+                                             4 + (64 * 3), 5 + (64 * 3),
+                                             11 + (64 * 3), 63 + (64 * 1),
+                                             24 + (64 * 1), 11 + (64 * 3),
+                                             42 + (64 * 3), 39 + (64 * 2),
+                                             2 + (64 * 2), 57 + (64 * 3),
+                                             22 + (64 * 3), 11 + (64 * 2),
+                                             6 + (64 * 2), 16 + (64 * 2),
+                                             18 + (64 * 2), 21 + (64 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx2df,  ((vnx2di)  { 7 + (8 * 1), 3 + (8 * 3),
+                                             2 + (8 * 5), 1 + (8 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx4sf,  ((vnx4si)  { 1 + (16 * 1), 9 + (16 * 2),
+                                             13 + (16 * 2), 11 + (16 * 3),
+                                             2 + (16 * 2), 5 + (16 * 2),
+                                             4 + (16 * 4), 2 + (16 * 3) }));
+VEC_PERM_CONST_OVERRUN (vnx8hf,  ((vnx8hi)  { 8 + (32 * 3), 27 + (32 * 1),
+                                             5 + (32 * 3), 4 + (32 * 3),
+                                             21 + (32 * 1), 12 + (32 * 3),
+                                             13 + (32 * 3), 0 + (32 * 1),
+                                             22 + (32 * 2), 1 + (32 * 2),
+                                             8 + (32 * 2), 9 + (32 * 1),
+                                             3 + (32 * 2), 24 + (32 * 2),
+                                             15 + (32 * 1), 1 + (32 * 1) }));
+
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_1_run.c
new file mode 100644 (file)
index 0000000..72ddf65
--- /dev/null
@@ -0,0 +1,70 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O" } */
+/* { dg-options "-O -msve-vector-bits=256" { target aarch64_sve256_hw } } */
+
+#include "vec_perm_const_1.c"
+#include "vec_perm_const_1_overrun.c"
+
+#define TEST_VEC_PERM(TYPE, EXPECTED_RESULT, VALUES1, VALUES2)         \
+{                                                                      \
+  TYPE expected_result = EXPECTED_RESULT;                              \
+  TYPE values1 = VALUES1;                                              \
+  TYPE values2 = VALUES2;                                              \
+  TYPE dest;                                                           \
+  dest = vec_perm_##TYPE (values1, values2);                           \
+  if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0)  \
+    __builtin_abort ();                                                        \
+  TYPE dest2;                                                          \
+  dest2 = vec_perm_overrun_##TYPE (values1, values2);                  \
+  if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0)  \
+    __builtin_abort ();                                                        \
+}
+
+int main (void)
+{
+  TEST_VEC_PERM (vnx2di,
+                ((vnx2di) { 12, 7, 36, 5 }),
+                ((vnx2di) { 4, 5, 6, 7 }),
+                ((vnx2di) { 12, 24, 36, 48 }));
+  TEST_VEC_PERM (vnx4si,
+                ((vnx4si) { 6, 34, 36, 37, 5, 7, 7, 5 }),
+                ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+                ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+  TEST_VEC_PERM (vnx8hi,
+                ((vnx8hi) { 11, 44, 8, 7, 38, 15, 16, 3,
+                            39, 4, 11, 12, 6, 41, 18, 4 }),
+                ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                            12, 13, 14, 15, 16, 17, 18 }),
+                ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+                            41, 42, 43, 44, 45, 46, 47, 48 }));
+  TEST_VEC_PERM (vnx16qi,
+                ((vnx16qi) { 5, 7, 7, 6, 12, 4, 7, 4,
+                             36, 7, 6, 5, 4, 24, 6, 7,
+                             4, 5, 7, 48, 4, 7, 36, 48,
+                             6, 24, 6, 7, 6, 4, 6, 5 }),
+                ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7 }),
+                ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48 }));
+  TEST_VEC_PERM (vnx2df,
+                ((vnx2df) { 48.5, 7.5, 6.5, 5.5 }),
+                ((vnx2df) { 4.5, 5.5, 6.5, 7.5 }),
+                ((vnx2df) { 12.5, 24.5, 36.5, 48.5 }));
+  TEST_VEC_PERM (vnx4sf,
+                ((vnx4sf) { 4.5, 34.5, 38.5, 36.5, 5.5, 8.5, 7.5, 5.5 }),
+                ((vnx4sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }),
+                ((vnx4sf) { 33.5, 34.5, 35.5, 36.5,
+                            37.5, 38.5, 39.5, 40.5 }));
+  TEST_VEC_PERM (vnx8hf,
+                ((vnx8hf) { 11.0, 44.0, 8.0, 7.0, 38.0, 15.0, 16.0, 3.0,
+                            39.0, 4.0, 11.0, 12.0, 6.0, 41.0, 18.0, 4.0 }),
+                ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
+                            12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+                ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                            41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_single_1.c
new file mode 100644 (file)
index 0000000..61122ba
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define VEC_PERM_SINGLE(TYPE, MASK)                    \
+TYPE vec_perm_##TYPE (TYPE values1, TYPE values2)      \
+{                                                      \
+  return __builtin_shuffle (values1, values2, MASK);   \
+}
+
+VEC_PERM_SINGLE (vnx2di,  ((vnx2di)  { 0, 3, 2, 1 }));
+VEC_PERM_SINGLE (vnx4si,  ((vnx4si)  { 3, 7, 1, 0, 2, 4, 4, 2 }));
+VEC_PERM_SINGLE (vnx8hi,  ((vnx8hi)  { 8, 7, 5, 4, 11, 12, 13, 0,
+                                      1, 1, 8, 9, 3, 14, 15, 1 }));
+VEC_PERM_SINGLE (vnx16qi, ((vnx16qi) { 13, 21, 11, 2, 8, 28, 3, 4,
+                                      14, 11, 30, 1, 0, 31, 2, 3,
+                                      4, 5, 11, 23, 24, 11, 12, 9,
+                                      2, 7, 22, 11, 6, 16, 18, 21 }));
+VEC_PERM_SINGLE (vnx2df,  ((vnx2di)  { 3, 3, 1, 1 }));
+VEC_PERM_SINGLE (vnx4sf,  ((vnx4si)  { 4, 5, 6, 0, 2, 7, 4, 2 }));
+VEC_PERM_SINGLE (vnx8hf,  ((vnx8hi)  { 8, 7, 5, 4, 11, 12, 13, 0,
+                                      1, 1, 8, 9, 3, 14, 15, 1 }));
+
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_const_single_1_run.c
new file mode 100644 (file)
index 0000000..6c8e20d
--- /dev/null
@@ -0,0 +1,65 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O" } */
+/* { dg-options "-O -msve-vector-bits=256" { target aarch64_sve256_hw } } */
+
+#include "vec_perm_const_single_1.c"
+
+#define TEST_VEC_PERM(TYPE, EXPECTED_RESULT, VALUES1, VALUES2)         \
+{                                                                      \
+  TYPE expected_result = EXPECTED_RESULT;                              \
+  TYPE values1 = VALUES1;                                              \
+  TYPE values2 = VALUES2;                                              \
+  TYPE dest;                                                           \
+  dest = vec_perm_##TYPE (values1, values2);                           \
+  if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0)  \
+    __builtin_abort ();                                                        \
+}
+
+int main (void)
+{
+  TEST_VEC_PERM (vnx2di,
+                ((vnx2di) { 4, 7, 6, 5 }),
+                ((vnx2di) { 4, 5, 6, 7 }),
+                ((vnx2di) { 12, 24, 36, 48 }));
+  TEST_VEC_PERM (vnx4si,
+                ((vnx4si) { 6, 10, 4, 3, 5, 7, 7, 5 }),
+                ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+                ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }));
+  TEST_VEC_PERM (vnx8hi,
+                ((vnx8hi) { 11, 10, 8, 7, 14, 15, 16, 3,
+                            4, 4, 11, 12, 6, 17, 18, 4 }),
+                ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+                            11, 12, 13, 14, 15, 16, 17, 18 }),
+                ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40,
+                            41, 42, 43, 44, 45, 46, 47, 48 }));
+  TEST_VEC_PERM (vnx16qi,
+                ((vnx16qi) { 5, 5, 7, 6, 4, 4, 7, 4,
+                             6, 7, 6, 5, 4, 7, 6, 7,
+                             4, 5, 7, 7, 4, 7, 4, 5,
+                             6, 7, 6, 7, 6, 4, 6, 5 }),
+                ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7 }),
+                ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48,
+                             12, 24, 36, 48, 12, 24, 36, 48 }));
+  TEST_VEC_PERM (vnx2df,
+                ((vnx2df) { 7.5, 7.5, 5.5, 5.5 }),
+                ((vnx2df) { 4.5, 5.5, 6.5, 7.5 }),
+                ((vnx2df) { 12.5, 24.5, 36.5, 48.5 }));
+  TEST_VEC_PERM (vnx4sf,
+                ((vnx4sf) { 7.5, 8.5, 9.5, 3.5, 5.5, 10.5, 7.5, 5.5 }),
+                ((vnx4sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }),
+                ((vnx4sf) { 33.5, 34.5, 35.5, 36.5,
+                            37.5, 38.5, 39.5, 40.5 }));
+  TEST_VEC_PERM (vnx8hf,
+                ((vnx8hf) { 11.0, 10.0, 8.0, 7.0, 14.0, 15.0, 16.0, 3.0,
+                            4.0, 4.0, 11.0, 12.0, 6.0, 17.0, 18.0, 4.0 }),
+                ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                            11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+                ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                            41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }));
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_single_1.c
new file mode 100644 (file)
index 0000000..41646d3
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define VEC_PERM(TYPE, MASKTYPE)                       \
+TYPE vec_perm_##TYPE (TYPE values, MASKTYPE mask)      \
+{                                                      \
+  return __builtin_shuffle (values, mask);             \
+}
+
+VEC_PERM (vnx2di, vnx2di)
+VEC_PERM (vnx4si, vnx4si)
+VEC_PERM (vnx8hi, vnx8hi)
+VEC_PERM (vnx16qi, vnx16qi)
+VEC_PERM (vnx2df, vnx2di)
+VEC_PERM (vnx4sf, vnx4si)
+VEC_PERM (vnx8hf, vnx8hi)
+
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_perm_single_1_run.c
new file mode 100644 (file)
index 0000000..eaa6a88
--- /dev/null
@@ -0,0 +1,65 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O" } */
+/* { dg-options "-O -msve-vector-bits=256" { target aarch64_sve256_hw } } */
+
+#include "vec_perm_single_1.c"
+extern void abort (void);
+
+#define TEST_VEC_PERM(TYPE, MASK_TYPE, EXPECTED_RESULT, VALUES, MASK)  \
+{                                                                      \
+  TYPE expected_result = EXPECTED_RESULT;                              \
+  TYPE values = VALUES;                                                        \
+  MASK_TYPE mask = MASK;                                               \
+  TYPE dest;                                                           \
+  dest = vec_perm_##TYPE (values, mask);                               \
+  if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0)  \
+    __builtin_abort ();                                                        \
+}
+
+int main (void)
+{
+  TEST_VEC_PERM (vnx2di, vnx2di,
+                ((vnx2di) { 5, 6, 7, 5 }),
+                ((vnx2di) { 4, 5, 6, 7 }),
+                ((vnx2di) { 1, 6, 3, 5 }));
+  TEST_VEC_PERM (vnx4si, vnx4si,
+                ((vnx4si) { 4, 8, 10, 10, 9, 8, 7, 5 }),
+                ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }),
+                ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+  TEST_VEC_PERM (vnx8hi, vnx8hi,
+                ((vnx8hi) { 12, 16, 18, 10, 12, 13, 14, 4,
+                            7, 18, 3, 5, 9, 8, 7, 13 }),
+                ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10,
+                            11, 12, 13, 14, 15, 16, 17, 18 }),
+                ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+                            4, 31, 0, 18, 6, 5, 4, 10 }));
+  TEST_VEC_PERM (vnx16qi, vnx16qi,
+                ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5,
+                             6, 7, 4, 5, 6, 7, 4, 5,
+                             5, 6, 7, 4, 5, 6, 4, 5,
+                             6, 7, 4, 5, 6, 7, 4, 5 }),
+                ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7,
+                             4, 5, 6, 7, 4, 5, 6, 7 }),
+                ((vnx16qi) { 5, 6, 7, 8, 9, 10, 28, 29,
+                             30, 31, 32, 33, 54, 55, 56, 61,
+                             5, 6, 7, 8, 9, 10, 28, 29,
+                             30, 31, 32, 33, 54, 55, 56, 61 }));
+  TEST_VEC_PERM (vnx2df, vnx2di,
+                ((vnx2df) { 5.1, 6.1, 7.1, 5.1 }),
+                ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }),
+                ((vnx2di) { 1, 6, 3, 5 }));
+  TEST_VEC_PERM (vnx4sf, vnx4si,
+                ((vnx4sf) { 4.2, 8.2, 10.2, 10.2, 9.2, 8.2, 7.2, 5.2 }),
+                ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }),
+                ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 }));
+  TEST_VEC_PERM (vnx8hf, vnx8hi,
+                ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 12.0, 13.0, 14.0, 4.0,
+                            7.0, 18.0, 3.0, 5.0, 9.0, 8.0, 7.0, 13.0 }),
+                ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                            11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }),
+                ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17,
+                            4, 31, 0, 18, 6, 5, 4, 10 }));
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/zip1_1.c b/gcc/testsuite/gcc.target/aarch64/sve/zip1_1.c
new file mode 100644 (file)
index 0000000..051df31
--- /dev/null
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#ifndef BIAS
+#define BIAS 0
+#endif
+
+#include <stdint.h>
+
+typedef int64_t vnx2di __attribute__((vector_size (32)));
+typedef int32_t vnx4si __attribute__((vector_size (32)));
+typedef int16_t vnx8hi __attribute__((vector_size (32)));
+typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef double vnx2df __attribute__((vector_size (32)));
+typedef float vnx4sf __attribute__((vector_size (32)));
+typedef _Float16 vnx8hf __attribute__((vector_size (32)));
+
+#define MASK_2(X, Y) X, Y + X
+#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 1, Y)
+#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 2, Y)
+#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 4, Y)
+#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 8, Y)
+
+#define INDEX_4 vnx2di
+#define INDEX_8 vnx4si
+#define INDEX_16 vnx8hi
+#define INDEX_32 vnx16qi
+
+#define PERMUTE(TYPE, NUNITS)                                  \
+  TYPE permute_##TYPE (TYPE values1, TYPE values2)             \
+  {                                                            \
+    return __builtin_shuffle                                   \
+      (values1, values2,                                       \
+       ((INDEX_##NUNITS) { MASK_##NUNITS (BIAS * (NUNITS / 2), \
+                                         NUNITS) }));          \
+  }
+
+#define TEST_ALL(T)                            \
+  T (vnx2di, 4)                                        \
+  T (vnx4si, 8)                                        \
+  T (vnx8hi, 16)                               \
+  T (vnx16qi, 32)                              \
+  T (vnx2df, 4)                                        \
+  T (vnx4sf, 8)                                        \
+  T (vnx8hf, 16)
+
+TEST_ALL (PERMUTE)
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/zip2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/zip2_1.c
new file mode 100644 (file)
index 0000000..85d545a
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msve-vector-bits=256" } */
+
+#define BIAS 1
+#include "zip1_1.c"
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */
index 3d55ecfdb323d72472c26296b9b51f20f7ff1f62..14dff87d7f057ce2b862a845481af9e9dded6421 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O3" } */
 
+#pragma GCC target "+nosve"
+
 int 
 t6(int len, void * dummy, unsigned short * __restrict x)
 {
index fd3b578c0bb23c4aaf5098fa01f487b7cebc0b67..79d0d094fc3d152cdd8ef6cb5088564c2e01bc13 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O3" } */
 
+#pragma GCC target "+nosve"
+
 int 
 t6(int len, void * dummy, unsigned short * __restrict x)
 {
index 499af5115212c0b4a12a2137016e03bbc323938a..39cbd6b6cc238c613ac48a2ba1df9aa662f4421c 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O3" } */
 
+#pragma GCC target "+nosve"
+
 int 
 t6(int len, void * dummy, char * __restrict x)
 {
index 69afff1dd14b799bdc58c72b0de6d12ac1e9f714..a327b632ef06872cfed62b3caf83a48cfe8b7bf4 100644 (file)
@@ -3,6 +3,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -ftree-vectorize" } */
 
+#pragma GCC target "+nosve"
+
 #define COUNT1(X) if (X) count += 1
 #define COUNT2(X) if (X) count -= 1
 #define COUNT3(X) count += (X)
index 33130aab55d6ae14423291173e68d80ee49fd331..82bc16f9ec9278ab8d1a8a329411868169b4b464 100644 (file)
@@ -1,7 +1,8 @@
-
 /* { dg-do compile } */
 /* { dg-options "-O3" } */
 
+#pragma GCC target "+nosve"
+
 #include "vect.x"
 
 /* { dg-final { scan-assembler "orn\\tv" } } */
index cce9240343f0f9c47f589a39ad4d26d9b41ab209..4d1a8b9af76963f3dfaf5a7f77d4abb1fd94a688 100644 (file)
@@ -1,7 +1,8 @@
-
 /* { dg-do compile } */
 /* { dg-options "-O3 -ffast-math" } */
 
+#pragma GCC target "+nosve"
+
 #include "vect-faddv.x"
 
 /* { dg-final { scan-assembler-times "faddp\\tv" 2} } */
index 4640f571715dcfecfa37cccdeb2a3e0388911e8d..75dbf6369dd9bd9ceb27e476867e42b368443b18 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #define FTYPE double
 #define ITYPE long
 #define OP ==
index 6d5fdb59c8117707143d624a625b256408c57e42..d97a0ebac601df90bd6562ea15269bcc631cd124 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
 
+#pragma GCC target "+nosve"
+
 #define FTYPE float
 #define ITYPE int
 #define OP ==
index f5b6329142dc58265a2cfbecffef9bf2ddad715b..594542dda65771ba26556348866980721c65efcc 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #define FTYPE double
 #define ITYPE long
 #define OP >=
index 9561b7159de8baf4969db05a9ab282de07fc5acb..7eeb98830f993a4794e34c0bef4f75ccbb9a4f1b 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
 
+#pragma GCC target "+nosve"
+
 #define FTYPE float
 #define ITYPE int
 #define OP >=
index 28d7ab6c443f284df545eb92a6586f781fe3b08c..4c863f417c2f89c8e9d9e2e23e4c6d54e4e5e13d 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #define FTYPE double
 #define ITYPE long
 #define OP >
index 20abbd544cad61aa2b0f47104bf1db9a32145b84..c56e78ad0ecc1085705523677b1595e5d3d131e8 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
 
+#pragma GCC target "+nosve"
+
 #define FTYPE float
 #define ITYPE int
 #define OP >
index 1285a506320dafdff60e7e18ee4e22157ee3c493..7a5972ad4a69c33cb5273b68a3fed24d63f0b7b9 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -ffast-math" } */
 
+#pragma GCC target "+nosve"
+
 #include "vect-fmax-fmin.x"
 
 /* { dg-final { scan-assembler "fmaxnm\\tv" } } */
index 975cef9c58454d3d324c4717d6afcd2109700206..355ab7e51e7418902faa1e63462ff0ba038e4c61 100644 (file)
@@ -1,7 +1,8 @@
-
 /* { dg-do compile } */
 /* { dg-options "-O3 -ffast-math -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #include "vect-fmaxv-fminv.x"
 
 /* { dg-final { scan-assembler "fminnmv" } } */
index bfd327cb3460e63ede9b2d1f0886903f6e92408f..c987f5fb83bf0d7f2605763a1ae5be9b32037f9a 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #define N 32
 
 void
index 53aa66cff54d7b15dcd89e6f798e9dc4afae886b..85bd5c5fffe797797135553e339094157d56f3da 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #define N 32
 
 void
index 8eb8a65eb997b1f231b3d052a99f57fe8786b983..22a0535433a412044bb0428c8a8439f605b2708b 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #define N 32
 
 void
index 90d911cae6e99143d2937fc2542fca7dfb026f39..2869b27517c69562b7b6d90a691d9b59122ce60e 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #define N 32
 
 void
index 47ef100e87bda15a2eb0ebf60c7cbd6d64836657..22164ef3335e8f081654ec65dc6345a2b743a43b 100644 (file)
@@ -1,8 +1,8 @@
-
-
 /* { dg-do compile } */
 /* { dg-options "-O3" } */
 
+#pragma GCC target "+nosve"
+
 #include "vect-fp.x"
 
 /* { dg-final { scan-assembler "fadd\\tv" } } */
index 4711c612002069bd59c2c9c54cbdfe89cde14f18..30d54b3b066705ec1d5504a049e8930e1088fb0c 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #include "stdint.h"
 #include "vect-ld1r.x"
 
index 761777f794c5ecb93b0f4f22eba8465396e2d44c..30219e62d79d77d0a7c153f6c8f525355a2ca250 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -fno-vect-cost-model" } */
 
+#pragma GCC target "+nosve"
+
 #include "stdint.h"
 #include "vect-ld1r.x"
 
index 311d3dafaec73e76591abc96df50827e5bd6b94c..16e6842012b174acf86336e21bfe2e20113dd63a 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O3 --save-temps -fno-inline" } */
 
+#pragma GCC target "+nosve"
+
 extern void abort (void);
 
 #define N 16
index e90c97ff32647345002a9f2c8dbee651982f915e..c9e4dccbd9815ff826550df682f704018961e278 100644 (file)
@@ -1,7 +1,8 @@
-
 /* { dg-do compile } */
 /* { dg-options "-O3" } */
 
+#pragma GCC target "+nosve"
+
 #define N 16
 
 #include "vect-mull.x"
index 6261e9d1ea6fa8949d392543e08b880477a1ed5d..918822a7d004baf36b4538d81222f3dff10d5451 100644 (file)
@@ -2,6 +2,8 @@
 /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */
 /* Write a reduction loop to be reduced using whole vector right shift.  */
 
+#pragma GCC target "+nosve"
+
 extern void abort (void);
 
 unsigned char in[8] __attribute__((__aligned__(16)));
index 7501825b0cb56ae7f1154cd3813c9ba4cb5969f7..41e9157dbecf43e445ec6c7ec84221770485582b 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O3 --save-temps -ffast-math" } */
 
+#pragma GCC target "+nosve"
+
 #include <arm_neon.h>
 
 extern void abort (void);
index 659b949c6c47b462407f36f9f64322ef7bd8ecc0..1d56e0534d93851e2bce9be478043fe8180c7aad 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */
 
+#pragma GCC target "+nosve"
+
 typedef signed char S8_t;
 typedef signed short S16_t;
 typedef signed int S32_t;
index c191d2eba102b5c58c660c6f327640172531ffc6..7cbf2a4d069b8100594ab76b78cbf854a7edff95 100644 (file)
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */
 
+#pragma GCC target "+nosve"
+
 typedef signed char S8_t;
 typedef signed short S16_t;
 typedef signed int S32_t;
index bf43f1cd72ec4e636dbd45534c9ab348bee8febc..aecf8262706c3b0365989d30e162f22f57a6a8c9 100644 (file)
@@ -49,5 +49,6 @@ f12 (void)
   return sum;
 }
 
-
-/* { dg-final { scan-assembler-not "sp" } } */
+/* Fails for fixed-length SVE because we lack a vec_init pattern.
+   A later patch fixes this in generic code.  */
+/* { dg-final { scan-assembler-not "sp" { xfail { aarch64_sve && { ! vect_variable_length } } } } } */
index f525426eb40034f4dbea17644293016b4970bb2d..43ca15dfae64bba7125115dd1a4280797e33a471 100644 (file)
@@ -8601,7 +8601,7 @@ proc check_effective_target_aarch64_tiny { } {
 # Create functions to check that the AArch64 assembler supports the
 # various architecture extensions via the .arch_extension pseudo-op.
 
-foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod"} {
+foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"} {
     eval [string map [list FUNC $aarch64_ext] {
        proc check_effective_target_aarch64_asm_FUNC_ok { } {
          if { [istarget aarch64*-*-*] } {