From 9f4cbab84d3bc5d4f35258cbc2e6df48d8821497 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sat, 13 Jan 2018 17:57:36 +0000 Subject: [PATCH] [AArch64] SVE load/store_lanes support This patch adds support for SVE LD[234], ST[234] and associated structure modes. Unlike Advanced SIMD, these modes are extra-long vector modes instead of integer modes. 2018-01-13 Richard Sandiford Alan Hayward David Sherwood gcc/ * config/aarch64/aarch64-modes.def: Define x2, x3 and x4 vector modes for SVE. * config/aarch64/aarch64-protos.h (aarch64_sve_struct_memory_operand_p): Declare. * config/aarch64/iterators.md (SVE_STRUCT): New mode iterator. (vector_count, insn_length, VSINGLE, vsingle): New mode attributes. (VPRED, vpred): Handle SVE structure modes. * config/aarch64/constraints.md (Utx): New constraint. * config/aarch64/predicates.md (aarch64_sve_struct_memory_operand) (aarch64_sve_struct_nonimmediate_operand): New predicates. * config/aarch64/aarch64.md (UNSPEC_LDN, UNSPEC_STN): New unspecs. * config/aarch64/aarch64-sve.md (mov, *aarch64_sve_mov_le) (*aarch64_sve_mov_be, pred_mov): New patterns for structure modes. Split into pieces after RA. (vec_load_lanes, vec_mask_load_lanes) (vec_store_lanes, vec_mask_store_lanes): New patterns. * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Handle SVE structure modes. (aarch64_classify_address): Likewise. (sizetochar): Move earlier in file. (aarch64_print_operand): Handle SVE register lists. (aarch64_array_mode): New function. (aarch64_sve_struct_memory_operand_p): Likewise. (TARGET_ARRAY_MODE): Redefine. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_vect_load_lanes): Return true for SVE too. * g++.dg/vect/pr36648.cc: XFAIL for variable-length vectors if load/store lanes are supported. * gcc.dg/vect/slp-10.c: Likewise. * gcc.dg/vect/slp-12c.c: Likewise. * gcc.dg/vect/slp-17.c: Likewise. * gcc.dg/vect/slp-33.c: Likewise. * gcc.dg/vect/slp-6.c: Likewise. * gcc.dg/vect/slp-cond-1.c: Likewise. * gcc.dg/vect/slp-multitypes-11-big-array.c: Likewise. * gcc.dg/vect/slp-multitypes-11.c: Likewise. * gcc.dg/vect/slp-multitypes-12.c: Likewise. * gcc.dg/vect/slp-perm-5.c: Remove XFAIL for variable-length SVE. * gcc.dg/vect/slp-perm-6.c: Likewise. * gcc.dg/vect/slp-perm-9.c: Likewise. * gcc.dg/vect/slp-reduc-6.c: Remove XFAIL for variable-length vectors. * gcc.dg/vect/vect-load-lanes-peeling-1.c: Expect an epilogue loop for variable-length vectors. Co-Authored-By: Alan Hayward Co-Authored-By: David Sherwood From-SVN: r256618 --- gcc/ChangeLog | 30 ++++ gcc/config/aarch64/aarch64-modes.def | 3 + gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64-sve.md | 153 ++++++++++++++++++ gcc/config/aarch64/aarch64.c | 102 +++++++++--- gcc/config/aarch64/aarch64.md | 2 + gcc/config/aarch64/constraints.md | 6 + gcc/config/aarch64/iterators.md | 103 +++++++++++- gcc/config/aarch64/predicates.md | 8 + gcc/testsuite/ChangeLog | 24 +++ gcc/testsuite/g++.dg/vect/pr36648.cc | 2 +- gcc/testsuite/gcc.dg/vect/slp-10.c | 2 +- gcc/testsuite/gcc.dg/vect/slp-12c.c | 2 +- gcc/testsuite/gcc.dg/vect/slp-17.c | 2 +- gcc/testsuite/gcc.dg/vect/slp-33.c | 2 +- gcc/testsuite/gcc.dg/vect/slp-6.c | 2 +- gcc/testsuite/gcc.dg/vect/slp-cond-1.c | 2 +- .../gcc.dg/vect/slp-multitypes-11-big-array.c | 2 +- gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c | 2 +- gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c | 2 +- gcc/testsuite/gcc.dg/vect/slp-perm-5.c | 4 +- gcc/testsuite/gcc.dg/vect/slp-perm-6.c | 6 +- gcc/testsuite/gcc.dg/vect/slp-perm-9.c | 4 +- gcc/testsuite/gcc.dg/vect/slp-reduc-6.c | 2 +- .../gcc.dg/vect/vect-load-lanes-peeling-1.c | 2 +- gcc/testsuite/lib/target-supports.exp | 3 +- 26 files changed, 425 insertions(+), 48 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ed20d31774b..fba27e19675 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,33 @@ +2018-01-13 Richard Sandiford + Alan Hayward + David Sherwood + + * config/aarch64/aarch64-modes.def: Define x2, x3 and x4 vector + modes for SVE. + * config/aarch64/aarch64-protos.h + (aarch64_sve_struct_memory_operand_p): Declare. + * config/aarch64/iterators.md (SVE_STRUCT): New mode iterator. + (vector_count, insn_length, VSINGLE, vsingle): New mode attributes. + (VPRED, vpred): Handle SVE structure modes. + * config/aarch64/constraints.md (Utx): New constraint. + * config/aarch64/predicates.md (aarch64_sve_struct_memory_operand) + (aarch64_sve_struct_nonimmediate_operand): New predicates. + * config/aarch64/aarch64.md (UNSPEC_LDN, UNSPEC_STN): New unspecs. + * config/aarch64/aarch64-sve.md (mov, *aarch64_sve_mov_le) + (*aarch64_sve_mov_be, pred_mov): New patterns for + structure modes. Split into pieces after RA. + (vec_load_lanes, vec_mask_load_lanes) + (vec_store_lanes, vec_mask_store_lanes): + New patterns. + * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Handle + SVE structure modes. + (aarch64_classify_address): Likewise. + (sizetochar): Move earlier in file. + (aarch64_print_operand): Handle SVE register lists. + (aarch64_array_mode): New function. + (aarch64_sve_struct_memory_operand_p): Likewise. + (TARGET_ARRAY_MODE): Redefine. + 2018-01-13 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 4e9da29d321..1a05b6cc708 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -88,6 +88,9 @@ INT_MODE (XI, 64); /* Give SVE vectors the names normally used for 256-bit vectors. The actual number depends on command-line flags. */ SVE_MODES (1, VNx16, VNx8, VNx4, VNx2) +SVE_MODES (2, VNx32, VNx16, VNx8, VNx4) +SVE_MODES (3, VNx48, VNx24, VNx12, VNx6) +SVE_MODES (4, VNx64, VNx32, VNx16, VNx8) /* Quad float: 128-bit floating mode for long doubles. */ FLOAT_MODE (TF, 16, ieee_quad_format); diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 4f1fc15d39d..2d705d28647 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -432,6 +432,7 @@ rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT); bool aarch64_simd_mem_operand_p (rtx); bool aarch64_sve_ld1r_operand_p (rtx); bool aarch64_sve_ldr_operand_p (rtx); +bool aarch64_sve_struct_memory_operand_p (rtx); rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); rtx aarch64_tls_get_addr (void); tree aarch64_fold_builtin (tree, int, tree *, bool); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 352c3065094..e83d4f6c718 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -189,6 +189,105 @@ "st1\t%1., %2, %0" ) +;; SVE structure moves. +(define_expand "mov" + [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") + (match_operand:SVE_STRUCT 1 "general_operand"))] + "TARGET_SVE" + { + /* Big-endian loads and stores need to be done via LD1 and ST1; + see the comment at the head of the file for details. */ + if ((MEM_P (operands[0]) || MEM_P (operands[1])) + && BYTES_BIG_ENDIAN) + { + gcc_assert (can_create_pseudo_p ()); + aarch64_expand_sve_mem_move (operands[0], operands[1], mode); + DONE; + } + + if (CONSTANT_P (operands[1])) + { + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } + } +) + +;; Unpredicated structure moves (little-endian). +(define_insn "*aarch64_sve_mov_le" + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") + (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] + "TARGET_SVE && !BYTES_BIG_ENDIAN" + "#" + [(set_attr "length" "")] +) + +;; Unpredicated structure moves (big-endian). Memory accesses require +;; secondary reloads. +(define_insn "*aarch64_sve_mov_le" + [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") + (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] + "TARGET_SVE && BYTES_BIG_ENDIAN" + "#" + [(set_attr "length" "")] +) + +;; Split unpredicated structure moves into pieces. This is the same +;; for both big-endian and little-endian code, although it only needs +;; to handle memory operands for little-endian code. +(define_split + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") + (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] + "TARGET_SVE && reload_completed" + [(const_int 0)] + { + rtx dest = operands[0]; + rtx src = operands[1]; + if (REG_P (dest) && REG_P (src)) + aarch64_simd_emit_reg_reg_move (operands, mode, ); + else + for (unsigned int i = 0; i < ; ++i) + { + rtx subdest = simplify_gen_subreg (mode, dest, mode, + i * BYTES_PER_SVE_VECTOR); + rtx subsrc = simplify_gen_subreg (mode, src, mode, + i * BYTES_PER_SVE_VECTOR); + emit_insn (gen_rtx_SET (subdest, subsrc)); + } + DONE; + } +) + +;; Predicated structure moves. This works for both endiannesses but in +;; practice is only useful for big-endian. +(define_insn_and_split "pred_mov" + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx") + (unspec:SVE_STRUCT + [(match_operand: 1 "register_operand" "Upl, Upl") + (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")] + UNSPEC_MERGE_PTRUE))] + "TARGET_SVE + && (register_operand (operands[0], mode) + || register_operand (operands[2], mode))" + "#" + "&& reload_completed" + [(const_int 0)] + { + for (unsigned int i = 0; i < ; ++i) + { + rtx subdest = simplify_gen_subreg (mode, operands[0], + mode, + i * BYTES_PER_SVE_VECTOR); + rtx subsrc = simplify_gen_subreg (mode, operands[2], + mode, + i * BYTES_PER_SVE_VECTOR); + aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); + } + DONE; + } + [(set_attr "length" "")] +) + (define_expand "mov" [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") (match_operand:PRED_ALL 1 "general_operand"))] @@ -460,6 +559,60 @@ } ) +;; Unpredicated LD[234]. +(define_expand "vec_load_lanes" + [(set (match_operand:SVE_STRUCT 0 "register_operand") + (unspec:SVE_STRUCT + [(match_dup 2) + (match_operand:SVE_STRUCT 1 "memory_operand")] + UNSPEC_LDN))] + "TARGET_SVE" + { + operands[2] = force_reg (mode, CONSTM1_RTX (mode)); + } +) + +;; Predicated LD[234]. +(define_insn "vec_mask_load_lanes" + [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") + (unspec:SVE_STRUCT + [(match_operand: 2 "register_operand" "Upl") + (match_operand:SVE_STRUCT 1 "memory_operand" "m")] + UNSPEC_LDN))] + "TARGET_SVE" + "ld\t%0, %2/z, %1" +) + +;; Unpredicated ST[234]. This is always a full update, so the dependence +;; on the old value of the memory location (via (match_dup 0)) is redundant. +;; There doesn't seem to be any obvious benefit to treating the all-true +;; case differently though. In particular, it's very unlikely that we'll +;; only find out during RTL that a store_lanes is dead. +(define_expand "vec_store_lanes" + [(set (match_operand:SVE_STRUCT 0 "memory_operand") + (unspec:SVE_STRUCT + [(match_dup 2) + (match_operand:SVE_STRUCT 1 "register_operand") + (match_dup 0)] + UNSPEC_STN))] + "TARGET_SVE" + { + operands[2] = force_reg (mode, CONSTM1_RTX (mode)); + } +) + +;; Predicated ST[234]. +(define_insn "vec_mask_store_lanes" + [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") + (unspec:SVE_STRUCT + [(match_operand: 2 "register_operand" "Upl") + (match_operand:SVE_STRUCT 1 "register_operand" "w") + (match_dup 0)] + UNSPEC_STN))] + "TARGET_SVE" + "st\t%1, %2, %0" +) + (define_expand "vec_perm" [(match_operand:SVE_ALL 0 "register_operand") (match_operand:SVE_ALL 1 "register_operand") diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index c5ed870ef57..964cc540d85 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1178,9 +1178,15 @@ aarch64_classify_vector_mode (machine_mode mode) || inner == DImode || inner == DFmode)) { - if (TARGET_SVE - && known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR)) - return VEC_SVE_DATA; + if (TARGET_SVE) + { + if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR)) + return VEC_SVE_DATA; + if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2) + || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3) + || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4)) + return VEC_SVE_DATA | VEC_STRUCT; + } /* This includes V1DF but not V1DI (which doesn't exist). */ if (TARGET_SIMD @@ -1208,6 +1214,18 @@ aarch64_sve_data_mode_p (machine_mode mode) return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA; } +/* Implement target hook TARGET_ARRAY_MODE. */ +static opt_machine_mode +aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems) +{ + if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA + && IN_RANGE (nelems, 2, 4)) + return mode_for_vector (GET_MODE_INNER (mode), + GET_MODE_NUNITS (mode) * nelems); + + return opt_machine_mode (); +} + /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */ static bool aarch64_array_mode_supported_p (machine_mode mode, @@ -5590,6 +5608,18 @@ aarch64_classify_address (struct aarch64_address_info *info, ? offset_4bit_signed_scaled_p (mode, offset) : offset_9bit_signed_scaled_p (mode, offset)); + if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT)) + { + poly_int64 end_offset = (offset + + GET_MODE_SIZE (mode) + - BYTES_PER_SVE_VECTOR); + return (type == ADDR_QUERY_M + ? offset_4bit_signed_scaled_p (mode, offset) + : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset) + && offset_9bit_signed_scaled_p (SVE_BYTE_MODE, + end_offset))); + } + if (vec_flags == VEC_SVE_PRED) return offset_9bit_signed_scaled_p (mode, offset); @@ -6302,6 +6332,20 @@ aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate) return true; } +/* Return the equivalent letter for size. */ +static char +sizetochar (int size) +{ + switch (size) + { + case 64: return 'd'; + case 32: return 's'; + case 16: return 'h'; + case 8 : return 'b'; + default: gcc_unreachable (); + } +} + /* Print operand X to file F in a target specific manner according to CODE. The acceptable formatting commands given by CODE are: 'c': An integer or symbol address without a preceding # @@ -6589,7 +6633,18 @@ aarch64_print_operand (FILE *f, rtx x, int code) { case REG: if (aarch64_sve_data_mode_p (GET_MODE (x))) - asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM); + { + if (REG_NREGS (x) == 1) + asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM); + else + { + char suffix + = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x))); + asm_fprintf (f, "{z%d.%c - z%d.%c}", + REGNO (x) - V0_REGNUM, suffix, + END_REGNO (x) - V0_REGNUM - 1, suffix); + } + } else asm_fprintf (f, "%s", reg_names [REGNO (x)]); break; @@ -12760,20 +12815,6 @@ aarch64_final_prescan_insn (rtx_insn *insn) } -/* Return the equivalent letter for size. */ -static char -sizetochar (int size) -{ - switch (size) - { - case 64: return 'd'; - case 32: return 's'; - case 16: return 'h'; - case 8 : return 'b'; - default: gcc_unreachable (); - } -} - /* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX instruction. */ @@ -13368,6 +13409,28 @@ aarch64_sve_ldr_operand_p (rtx op) && addr.type == ADDRESS_REG_IMM); } +/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode. + We need to be able to access the individual pieces, so the range + is different from LD[234] and ST[234]. */ +bool +aarch64_sve_struct_memory_operand_p (rtx op) +{ + if (!MEM_P (op)) + return false; + + machine_mode mode = GET_MODE (op); + struct aarch64_address_info addr; + if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false, + ADDR_QUERY_ANY) + || addr.type != ADDRESS_REG_IMM) + return false; + + poly_int64 first = addr.const_offset; + poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR; + return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first) + && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last)); +} + /* Emit a register copy from operand to operand, taking care not to early-clobber source registers in the process. @@ -17376,6 +17439,9 @@ aarch64_libgcc_floating_mode_supported_p #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ aarch64_builtin_support_vector_misalignment +#undef TARGET_ARRAY_MODE +#define TARGET_ARRAY_MODE aarch64_array_mode + #undef TARGET_ARRAY_MODE_SUPPORTED_P #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 728136a7fba..33eff586c2a 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -161,6 +161,8 @@ UNSPEC_PACK UNSPEC_FLOAT_CONVERT UNSPEC_WHILE_LO + UNSPEC_LDN + UNSPEC_STN ]) (define_c_enum "unspecv" [ diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index b004f7888e1..6cc4cadfd10 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -249,6 +249,12 @@ (and (match_code "mem") (match_test "aarch64_sve_ld1r_operand_p (op)"))) +(define_memory_constraint "Utx" + "@internal + An address valid for SVE structure mov patterns (as distinct from + LD[234] and ST[234] patterns)." + (match_operand 0 "aarch64_sve_struct_memory_operand")) + (define_constraint "Ufc" "A floating point constant which can be used with an\ FMOV immediate operation." diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 0fe42edbc61..607f9c3027a 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -250,6 +250,14 @@ (define_mode_iterator SVE_ALL [VNx16QI VNx8HI VNx4SI VNx2DI VNx8HF VNx4SF VNx2DF]) +;; All SVE vector structure modes. +(define_mode_iterator SVE_STRUCT [VNx32QI VNx16HI VNx8SI VNx4DI + VNx16HF VNx8SF VNx4DF + VNx48QI VNx24HI VNx12SI VNx6DI + VNx24HF VNx12SF VNx6DF + VNx64QI VNx32HI VNx16SI VNx8DI + VNx32HF VNx16SF VNx8DF]) + ;; All SVE vector modes that have 8-bit or 16-bit elements. (define_mode_iterator SVE_BH [VNx16QI VNx8HI VNx8HF]) @@ -604,9 +612,16 @@ ;; Equivalent of "size" for a vector element. (define_mode_attr Vesize [(VNx16QI "b") - (VNx8HI "h") (VNx8HF "h") - (VNx4SI "w") (VNx4SF "w") - (VNx2DI "d") (VNx2DF "d")]) + (VNx8HI "h") (VNx8HF "h") + (VNx4SI "w") (VNx4SF "w") + (VNx2DI "d") (VNx2DF "d") + (VNx32QI "b") (VNx48QI "b") (VNx64QI "b") + (VNx16HI "h") (VNx24HI "h") (VNx32HI "h") + (VNx16HF "h") (VNx24HF "h") (VNx32HF "h") + (VNx8SI "w") (VNx12SI "w") (VNx16SI "w") + (VNx8SF "w") (VNx12SF "w") (VNx16SF "w") + (VNx4DI "d") (VNx6DI "d") (VNx8DI "d") + (VNx4DF "d") (VNx6DF "d") (VNx8DF "d")]) ;; Vetype is used everywhere in scheduling type and assembly output, ;; sometimes they are not the same, for example HF modes on some @@ -983,17 +998,93 @@ (define_code_attr f16mac [(plus "a") (minus "s")]) -;; The predicate mode associated with an SVE data mode. +;; The number of subvectors in an SVE_STRUCT. +(define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2") + (VNx8SI "2") (VNx4DI "2") + (VNx16HF "2") (VNx8SF "2") (VNx4DF "2") + (VNx48QI "3") (VNx24HI "3") + (VNx12SI "3") (VNx6DI "3") + (VNx24HF "3") (VNx12SF "3") (VNx6DF "3") + (VNx64QI "4") (VNx32HI "4") + (VNx16SI "4") (VNx8DI "4") + (VNx32HF "4") (VNx16SF "4") (VNx8DF "4")]) + +;; The number of instruction bytes needed for an SVE_STRUCT move. This is +;; equal to vector_count * 4. +(define_mode_attr insn_length [(VNx32QI "8") (VNx16HI "8") + (VNx8SI "8") (VNx4DI "8") + (VNx16HF "8") (VNx8SF "8") (VNx4DF "8") + (VNx48QI "12") (VNx24HI "12") + (VNx12SI "12") (VNx6DI "12") + (VNx24HF "12") (VNx12SF "12") (VNx6DF "12") + (VNx64QI "16") (VNx32HI "16") + (VNx16SI "16") (VNx8DI "16") + (VNx32HF "16") (VNx16SF "16") (VNx8DF "16")]) + +;; The type of a subvector in an SVE_STRUCT. +(define_mode_attr VSINGLE [(VNx32QI "VNx16QI") + (VNx16HI "VNx8HI") (VNx16HF "VNx8HF") + (VNx8SI "VNx4SI") (VNx8SF "VNx4SF") + (VNx4DI "VNx2DI") (VNx4DF "VNx2DF") + (VNx48QI "VNx16QI") + (VNx24HI "VNx8HI") (VNx24HF "VNx8HF") + (VNx12SI "VNx4SI") (VNx12SF "VNx4SF") + (VNx6DI "VNx2DI") (VNx6DF "VNx2DF") + (VNx64QI "VNx16QI") + (VNx32HI "VNx8HI") (VNx32HF "VNx8HF") + (VNx16SI "VNx4SI") (VNx16SF "VNx4SF") + (VNx8DI "VNx2DI") (VNx8DF "VNx2DF")]) + +;; ...and again in lower case. +(define_mode_attr vsingle [(VNx32QI "vnx16qi") + (VNx16HI "vnx8hi") (VNx16HF "vnx8hf") + (VNx8SI "vnx4si") (VNx8SF "vnx4sf") + (VNx4DI "vnx2di") (VNx4DF "vnx2df") + (VNx48QI "vnx16qi") + (VNx24HI "vnx8hi") (VNx24HF "vnx8hf") + (VNx12SI "vnx4si") (VNx12SF "vnx4sf") + (VNx6DI "vnx2di") (VNx6DF "vnx2df") + (VNx64QI "vnx16qi") + (VNx32HI "vnx8hi") (VNx32HF "vnx8hf") + (VNx16SI "vnx4si") (VNx16SF "vnx4sf") + (VNx8DI "vnx2di") (VNx8DF "vnx2df")]) + +;; The predicate mode associated with an SVE data mode. For structure modes +;; this is equivalent to the of the subvector mode. (define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8HI "VNx8BI") (VNx8HF "VNx8BI") (VNx4SI "VNx4BI") (VNx4SF "VNx4BI") - (VNx2DI "VNx2BI") (VNx2DF "VNx2BI")]) + (VNx2DI "VNx2BI") (VNx2DF "VNx2BI") + (VNx32QI "VNx16BI") + (VNx16HI "VNx8BI") (VNx16HF "VNx8BI") + (VNx8SI "VNx4BI") (VNx8SF "VNx4BI") + (VNx4DI "VNx2BI") (VNx4DF "VNx2BI") + (VNx48QI "VNx16BI") + (VNx24HI "VNx8BI") (VNx24HF "VNx8BI") + (VNx12SI "VNx4BI") (VNx12SF "VNx4BI") + (VNx6DI "VNx2BI") (VNx6DF "VNx2BI") + (VNx64QI "VNx16BI") + (VNx32HI "VNx8BI") (VNx32HF "VNx8BI") + (VNx16SI "VNx4BI") (VNx16SF "VNx4BI") + (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")]) ;; ...and again in lower case. (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8HI "vnx8bi") (VNx8HF "vnx8bi") (VNx4SI "vnx4bi") (VNx4SF "vnx4bi") - (VNx2DI "vnx2bi") (VNx2DF "vnx2bi")]) + (VNx2DI "vnx2bi") (VNx2DF "vnx2bi") + (VNx32QI "vnx16bi") + (VNx16HI "vnx8bi") (VNx16HF "vnx8bi") + (VNx8SI "vnx4bi") (VNx8SF "vnx4bi") + (VNx4DI "vnx2bi") (VNx4DF "vnx2bi") + (VNx48QI "vnx16bi") + (VNx24HI "vnx8bi") (VNx24HF "vnx8bi") + (VNx12SI "vnx4bi") (VNx12SF "vnx4bi") + (VNx6DI "vnx2bi") (VNx6DF "vnx2bi") + (VNx64QI "vnx16bi") + (VNx32HI "vnx8bi") (VNx32HF "vnx4bi") + (VNx16SI "vnx4bi") (VNx16SF "vnx4bi") + (VNx8DI "vnx2bi") (VNx8DF "vnx2bi")]) ;; ------------------------------------------------------------------- ;; Code Iterators diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 7424f506a5c..701789a08d1 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -496,6 +496,14 @@ (match_operand 0 "aarch64_sve_ldr_operand") (match_test "aarch64_mov_operand_p (op, mode)")))) +(define_predicate "aarch64_sve_struct_memory_operand" + (and (match_code "mem") + (match_test "aarch64_sve_struct_memory_operand_p (op)"))) + +(define_predicate "aarch64_sve_struct_nonimmediate_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_sve_struct_memory_operand"))) + ;; Doesn't include immediates, since those are handled by the move ;; patterns instead. (define_predicate "aarch64_sve_dup_operand" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9997b287d9d..02cd4181344 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,27 @@ +2018-01-13 Richard Sandiford + Alan Hayward + David Sherwood + + * lib/target-supports.exp (check_effective_target_vect_load_lanes): + Return true for SVE too. + * g++.dg/vect/pr36648.cc: XFAIL for variable-length vectors + if load/store lanes are supported. + * gcc.dg/vect/slp-10.c: Likewise. + * gcc.dg/vect/slp-12c.c: Likewise. + * gcc.dg/vect/slp-17.c: Likewise. + * gcc.dg/vect/slp-33.c: Likewise. + * gcc.dg/vect/slp-6.c: Likewise. + * gcc.dg/vect/slp-cond-1.c: Likewise. + * gcc.dg/vect/slp-multitypes-11-big-array.c: Likewise. + * gcc.dg/vect/slp-multitypes-11.c: Likewise. + * gcc.dg/vect/slp-multitypes-12.c: Likewise. + * gcc.dg/vect/slp-perm-5.c: Remove XFAIL for variable-length SVE. + * gcc.dg/vect/slp-perm-6.c: Likewise. + * gcc.dg/vect/slp-perm-9.c: Likewise. + * gcc.dg/vect/slp-reduc-6.c: Remove XFAIL for variable-length vectors. + * gcc.dg/vect/vect-load-lanes-peeling-1.c: Expect an epilogue loop + for variable-length vectors. + 2018-01-13 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/testsuite/g++.dg/vect/pr36648.cc b/gcc/testsuite/g++.dg/vect/pr36648.cc index 7bda82899d0..8d24d3d445d 100644 --- a/gcc/testsuite/g++.dg/vect/pr36648.cc +++ b/gcc/testsuite/g++.dg/vect/pr36648.cc @@ -25,6 +25,6 @@ int main() { } targets, ! vect_no_align is a sufficient test. */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } xfail { vect_variable_length && vect_load_lanes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-10.c b/gcc/testsuite/gcc.dg/vect/slp-10.c index 4701f7c5156..d5775ef737b 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-10.c +++ b/gcc/testsuite/gcc.dg/vect/slp-10.c @@ -107,7 +107,7 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult } xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult } xfail { vect_variable_length && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-12c.c b/gcc/testsuite/gcc.dg/vect/slp-12c.c index 6269faa09fe..df760327b5d 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-12c.c +++ b/gcc/testsuite/gcc.dg/vect/slp-12c.c @@ -48,5 +48,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_int_mult } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_int_mult } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-17.c b/gcc/testsuite/gcc.dg/vect/slp-17.c index 75c5802f477..7f26884388a 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-17.c +++ b/gcc/testsuite/gcc.dg/vect/slp-17.c @@ -51,5 +51,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-33.c b/gcc/testsuite/gcc.dg/vect/slp-33.c index ad74daf5dce..2404a5f19b4 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-33.c +++ b/gcc/testsuite/gcc.dg/vect/slp-33.c @@ -105,7 +105,7 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && {! {vect_int_mult}}} } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target {vect_uintfloat_cvt && vect_int_mult} xfail { vect_variable_length && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_uintfloat_cvt}} && {! {vect_int_mult}}} } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-6.c b/gcc/testsuite/gcc.dg/vect/slp-6.c index 03d02a5381e..8205d542f4d 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-6.c +++ b/gcc/testsuite/gcc.dg/vect/slp-6.c @@ -116,6 +116,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target vect_int_mult} } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target { ! { vect_int_mult } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { ! { vect_int_mult } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-1.c b/gcc/testsuite/gcc.dg/vect/slp-cond-1.c index f2bd1dc977e..fd9165fec81 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-cond-1.c +++ b/gcc/testsuite/gcc.dg/vect/slp-cond-1.c @@ -122,4 +122,4 @@ main () return 0; } -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11-big-array.c index 7793862f06f..a3d0670cea9 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11-big-array.c @@ -55,5 +55,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail { vect_variable_length && vect_load_lanes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c index 308153ba033..5200ed1cd94 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c +++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c @@ -49,5 +49,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail { vect_variable_length && vect_load_lanes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c index 616dcab64a2..d37434593d4 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c +++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c @@ -62,5 +62,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c index e0760084129..52939133ca8 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c @@ -104,9 +104,7 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* Fails for variable-length SVE because we fall back to Advanced SIMD - and use LD3/ST3. Will be fixed when SVE LOAD_LANES support is added. */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && {! vect_load_lanes } } xfail { aarch64_sve && vect_variable_length } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ /* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c index 3ee2b926b53..b7d7657939f 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c @@ -103,10 +103,8 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* Fails for variable-length SVE because we fall back to Advanced SIMD - and use LD3/ST3. Will be fixed when SVE LOAD_LANES support is added. */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && {! vect_load_lanes } } xfail { aarch64_sve && vect_variable_length } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes xfail { vect_variable_length && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-9.c b/gcc/testsuite/gcc.dg/vect/slp-perm-9.c index f7010799f6c..b01d493b6e7 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-9.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-9.c @@ -57,9 +57,7 @@ int main (int argc, const char* argv[]) return 0; } -/* Fails for variable-length SVE because we fall back to Advanced SIMD - and use LD3/ST3. Will be fixed when SVE LOAD_LANES support is added. */ -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target { ! { vect_perm_short || vect_load_lanes } } xfail { aarch64_sve && vect_variable_length } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target { ! { vect_perm_short || vect_load_lanes } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_short || vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target { vect_perm_short && { ! vect_perm3_short } } } } } */ /* { dg-final { scan-tree-dump-not "permutation requires at least three vectors" "vect" { target vect_perm3_short } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c index 49a0ffc2168..88591c5bdcb 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c @@ -44,5 +44,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_int_add || { ! { vect_unpack || vect_strided2 } } } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 1 "vect" { target { ! vect_no_int_add } xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 1 "vect" { target { ! vect_no_int_add } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-load-lanes-peeling-1.c b/gcc/testsuite/gcc.dg/vect/vect-load-lanes-peeling-1.c index c9cd104e8e5..447b32df97a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-load-lanes-peeling-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-load-lanes-peeling-1.c @@ -10,4 +10,4 @@ f (int *__restrict a, int *__restrict b) } /* { dg-final { scan-tree-dump-not "Data access with gaps" "vect" } } */ -/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" } } */ +/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" { xfail vect_variable_length } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 43ca15dfae6..aedb7980da2 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6579,8 +6579,7 @@ proc check_effective_target_vect_load_lanes { } { } else { set et_vect_load_lanes 0 if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) - || ([istarget aarch64*-*-*] - && ![check_effective_target_aarch64_sve]) } { + || [istarget aarch64*-*-*] } { set et_vect_load_lanes 1 } } -- 2.30.2