+2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
+ * config/aarch64/aarch64-modes.def: Define x2, x3 and x4 vector
+ modes for SVE.
+ * config/aarch64/aarch64-protos.h
+ (aarch64_sve_struct_memory_operand_p): Declare.
+ * config/aarch64/iterators.md (SVE_STRUCT): New mode iterator.
+ (vector_count, insn_length, VSINGLE, vsingle): New mode attributes.
+ (VPRED, vpred): Handle SVE structure modes.
+ * config/aarch64/constraints.md (Utx): New constraint.
+ * config/aarch64/predicates.md (aarch64_sve_struct_memory_operand)
+ (aarch64_sve_struct_nonimmediate_operand): New predicates.
+ * config/aarch64/aarch64.md (UNSPEC_LDN, UNSPEC_STN): New unspecs.
+ * config/aarch64/aarch64-sve.md (mov<mode>, *aarch64_sve_mov<mode>_le)
+ (*aarch64_sve_mov<mode>_be, pred_mov<mode>): New patterns for
+ structure modes. Split into pieces after RA.
+ (vec_load_lanes<mode><vsingle>, vec_mask_load_lanes<mode><vsingle>)
+ (vec_store_lanes<mode><vsingle>, vec_mask_store_lanes<mode><vsingle>):
+ New patterns.
+ * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Handle
+ SVE structure modes.
+ (aarch64_classify_address): Likewise.
+ (sizetochar): Move earlier in file.
+ (aarch64_print_operand): Handle SVE register lists.
+ (aarch64_array_mode): New function.
+ (aarch64_sve_struct_memory_operand_p): Likewise.
+ (TARGET_ARRAY_MODE): Redefine.
+
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
/* Give SVE vectors the names normally used for 256-bit vectors.
The actual number depends on command-line flags. */
SVE_MODES (1, VNx16, VNx8, VNx4, VNx2)
+SVE_MODES (2, VNx32, VNx16, VNx8, VNx4)
+SVE_MODES (3, VNx48, VNx24, VNx12, VNx6)
+SVE_MODES (4, VNx64, VNx32, VNx16, VNx8)
/* Quad float: 128-bit floating mode for long doubles. */
FLOAT_MODE (TF, 16, ieee_quad_format);
bool aarch64_simd_mem_operand_p (rtx);
bool aarch64_sve_ld1r_operand_p (rtx);
bool aarch64_sve_ldr_operand_p (rtx);
+bool aarch64_sve_struct_memory_operand_p (rtx);
rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool);
rtx aarch64_tls_get_addr (void);
tree aarch64_fold_builtin (tree, int, tree *, bool);
"st1<Vesize>\t%1.<Vetype>, %2, %0"
)
+;; SVE structure moves.
+(define_expand "mov<mode>"
+ [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
+ (match_operand:SVE_STRUCT 1 "general_operand"))]
+ "TARGET_SVE"
+ {
+ /* Big-endian loads and stores need to be done via LD1 and ST1;
+ see the comment at the head of the file for details. */
+ if ((MEM_P (operands[0]) || MEM_P (operands[1]))
+ && BYTES_BIG_ENDIAN)
+ {
+ gcc_assert (can_create_pseudo_p ());
+ aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
+ DONE;
+ }
+
+ if (CONSTANT_P (operands[1]))
+ {
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
+ DONE;
+ }
+ }
+)
+
+;; Unpredicated structure moves (little-endian).
+(define_insn "*aarch64_sve_mov<mode>_le"
+ [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
+ (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
+ "TARGET_SVE && !BYTES_BIG_ENDIAN"
+ "#"
+ [(set_attr "length" "<insn_length>")]
+)
+
+;; Unpredicated structure moves (big-endian). Memory accesses require
+;; secondary reloads.
+(define_insn "*aarch64_sve_mov<mode>_le"
+ [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
+ (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
+ "TARGET_SVE && BYTES_BIG_ENDIAN"
+ "#"
+ [(set_attr "length" "<insn_length>")]
+)
+
+;; Split unpredicated structure moves into pieces. This is the same
+;; for both big-endian and little-endian code, although it only needs
+;; to handle memory operands for little-endian code.
+(define_split
+ [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
+ (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
+ "TARGET_SVE && reload_completed"
+ [(const_int 0)]
+ {
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ if (REG_P (dest) && REG_P (src))
+ aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
+ else
+ for (unsigned int i = 0; i < <vector_count>; ++i)
+ {
+ rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
+ i * BYTES_PER_SVE_VECTOR);
+ rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
+ i * BYTES_PER_SVE_VECTOR);
+ emit_insn (gen_rtx_SET (subdest, subsrc));
+ }
+ DONE;
+ }
+)
+
+;; Predicated structure moves. This works for both endiannesses but in
+;; practice is only useful for big-endian.
+(define_insn_and_split "pred_mov<mode>"
+ [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx")
+ (unspec:SVE_STRUCT
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[2], <MODE>mode))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ for (unsigned int i = 0; i < <vector_count>; ++i)
+ {
+ rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
+ <MODE>mode,
+ i * BYTES_PER_SVE_VECTOR);
+ rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
+ <MODE>mode,
+ i * BYTES_PER_SVE_VECTOR);
+ aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
+ }
+ DONE;
+ }
+ [(set_attr "length" "<insn_length>")]
+)
+
(define_expand "mov<mode>"
[(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
(match_operand:PRED_ALL 1 "general_operand"))]
}
)
+;; Unpredicated LD[234].
+(define_expand "vec_load_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "register_operand")
+ (unspec:SVE_STRUCT
+ [(match_dup 2)
+ (match_operand:SVE_STRUCT 1 "memory_operand")]
+ UNSPEC_LDN))]
+ "TARGET_SVE"
+ {
+ operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+ }
+)
+
+;; Predicated LD[234].
+(define_insn "vec_mask_load_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
+ (unspec:SVE_STRUCT
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
+ UNSPEC_LDN))]
+ "TARGET_SVE"
+ "ld<vector_count><Vesize>\t%0, %2/z, %1"
+)
+
+;; Unpredicated ST[234]. This is always a full update, so the dependence
+;; on the old value of the memory location (via (match_dup 0)) is redundant.
+;; There doesn't seem to be any obvious benefit to treating the all-true
+;; case differently though. In particular, it's very unlikely that we'll
+;; only find out during RTL that a store_lanes is dead.
+(define_expand "vec_store_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "memory_operand")
+ (unspec:SVE_STRUCT
+ [(match_dup 2)
+ (match_operand:SVE_STRUCT 1 "register_operand")
+ (match_dup 0)]
+ UNSPEC_STN))]
+ "TARGET_SVE"
+ {
+ operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+ }
+)
+
+;; Predicated ST[234].
+(define_insn "vec_mask_store_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
+ (unspec:SVE_STRUCT
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_STRUCT 1 "register_operand" "w")
+ (match_dup 0)]
+ UNSPEC_STN))]
+ "TARGET_SVE"
+ "st<vector_count><Vesize>\t%1, %2, %0"
+)
+
(define_expand "vec_perm<mode>"
[(match_operand:SVE_ALL 0 "register_operand")
(match_operand:SVE_ALL 1 "register_operand")
|| inner == DImode
|| inner == DFmode))
{
- if (TARGET_SVE
- && known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR))
- return VEC_SVE_DATA;
+ if (TARGET_SVE)
+ {
+ if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR))
+ return VEC_SVE_DATA;
+ if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2)
+ || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3)
+ || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4))
+ return VEC_SVE_DATA | VEC_STRUCT;
+ }
/* This includes V1DF but not V1DI (which doesn't exist). */
if (TARGET_SIMD
return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
}
+/* Implement target hook TARGET_ARRAY_MODE. */
+static opt_machine_mode
+aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
+{
+ if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
+ && IN_RANGE (nelems, 2, 4))
+ return mode_for_vector (GET_MODE_INNER (mode),
+ GET_MODE_NUNITS (mode) * nelems);
+
+ return opt_machine_mode ();
+}
+
/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
static bool
aarch64_array_mode_supported_p (machine_mode mode,
? offset_4bit_signed_scaled_p (mode, offset)
: offset_9bit_signed_scaled_p (mode, offset));
+ if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
+ {
+ poly_int64 end_offset = (offset
+ + GET_MODE_SIZE (mode)
+ - BYTES_PER_SVE_VECTOR);
+ return (type == ADDR_QUERY_M
+ ? offset_4bit_signed_scaled_p (mode, offset)
+ : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
+ && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
+ end_offset)));
+ }
+
if (vec_flags == VEC_SVE_PRED)
return offset_9bit_signed_scaled_p (mode, offset);
return true;
}
+/* Return the equivalent letter for size. */
+static char
+sizetochar (int size)
+{
+ switch (size)
+ {
+ case 64: return 'd';
+ case 32: return 's';
+ case 16: return 'h';
+ case 8 : return 'b';
+ default: gcc_unreachable ();
+ }
+}
+
/* Print operand X to file F in a target specific manner according to CODE.
The acceptable formatting commands given by CODE are:
'c': An integer or symbol address without a preceding #
{
case REG:
if (aarch64_sve_data_mode_p (GET_MODE (x)))
- asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
+ {
+ if (REG_NREGS (x) == 1)
+ asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
+ else
+ {
+ char suffix
+ = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
+ asm_fprintf (f, "{z%d.%c - z%d.%c}",
+ REGNO (x) - V0_REGNUM, suffix,
+ END_REGNO (x) - V0_REGNUM - 1, suffix);
+ }
+ }
else
asm_fprintf (f, "%s", reg_names [REGNO (x)]);
break;
}
-/* Return the equivalent letter for size. */
-static char
-sizetochar (int size)
-{
- switch (size)
- {
- case 64: return 'd';
- case 32: return 's';
- case 16: return 'h';
- case 8 : return 'b';
- default: gcc_unreachable ();
- }
-}
-
/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
instruction. */
&& addr.type == ADDRESS_REG_IMM);
}
+/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
+ We need to be able to access the individual pieces, so the range
+ is different from LD[234] and ST[234]. */
+bool
+aarch64_sve_struct_memory_operand_p (rtx op)
+{
+ if (!MEM_P (op))
+ return false;
+
+ machine_mode mode = GET_MODE (op);
+ struct aarch64_address_info addr;
+ if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
+ ADDR_QUERY_ANY)
+ || addr.type != ADDRESS_REG_IMM)
+ return false;
+
+ poly_int64 first = addr.const_offset;
+ poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
+ return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
+ && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
+}
+
/* Emit a register copy from operand to operand, taking care not to
early-clobber source registers in the process.
#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
aarch64_builtin_support_vector_misalignment
+#undef TARGET_ARRAY_MODE
+#define TARGET_ARRAY_MODE aarch64_array_mode
+
#undef TARGET_ARRAY_MODE_SUPPORTED_P
#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
UNSPEC_PACK
UNSPEC_FLOAT_CONVERT
UNSPEC_WHILE_LO
+ UNSPEC_LDN
+ UNSPEC_STN
])
(define_c_enum "unspecv" [
(and (match_code "mem")
(match_test "aarch64_sve_ld1r_operand_p (op)")))
+(define_memory_constraint "Utx"
+ "@internal
+ An address valid for SVE structure mov patterns (as distinct from
+ LD[234] and ST[234] patterns)."
+ (match_operand 0 "aarch64_sve_struct_memory_operand"))
+
(define_constraint "Ufc"
"A floating point constant which can be used with an\
FMOV immediate operation."
(define_mode_iterator SVE_ALL [VNx16QI VNx8HI VNx4SI VNx2DI
VNx8HF VNx4SF VNx2DF])
+;; All SVE vector structure modes.
+(define_mode_iterator SVE_STRUCT [VNx32QI VNx16HI VNx8SI VNx4DI
+ VNx16HF VNx8SF VNx4DF
+ VNx48QI VNx24HI VNx12SI VNx6DI
+ VNx24HF VNx12SF VNx6DF
+ VNx64QI VNx32HI VNx16SI VNx8DI
+ VNx32HF VNx16SF VNx8DF])
+
;; All SVE vector modes that have 8-bit or 16-bit elements.
(define_mode_iterator SVE_BH [VNx16QI VNx8HI VNx8HF])
;; Equivalent of "size" for a vector element.
(define_mode_attr Vesize [(VNx16QI "b")
- (VNx8HI "h") (VNx8HF "h")
- (VNx4SI "w") (VNx4SF "w")
- (VNx2DI "d") (VNx2DF "d")])
+ (VNx8HI "h") (VNx8HF "h")
+ (VNx4SI "w") (VNx4SF "w")
+ (VNx2DI "d") (VNx2DF "d")
+ (VNx32QI "b") (VNx48QI "b") (VNx64QI "b")
+ (VNx16HI "h") (VNx24HI "h") (VNx32HI "h")
+ (VNx16HF "h") (VNx24HF "h") (VNx32HF "h")
+ (VNx8SI "w") (VNx12SI "w") (VNx16SI "w")
+ (VNx8SF "w") (VNx12SF "w") (VNx16SF "w")
+ (VNx4DI "d") (VNx6DI "d") (VNx8DI "d")
+ (VNx4DF "d") (VNx6DF "d") (VNx8DF "d")])
;; Vetype is used everywhere in scheduling type and assembly output,
;; sometimes they are not the same, for example HF modes on some
(define_code_attr f16mac [(plus "a") (minus "s")])
-;; The predicate mode associated with an SVE data mode.
+;; The number of subvectors in an SVE_STRUCT.
+(define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2")
+ (VNx8SI "2") (VNx4DI "2")
+ (VNx16HF "2") (VNx8SF "2") (VNx4DF "2")
+ (VNx48QI "3") (VNx24HI "3")
+ (VNx12SI "3") (VNx6DI "3")
+ (VNx24HF "3") (VNx12SF "3") (VNx6DF "3")
+ (VNx64QI "4") (VNx32HI "4")
+ (VNx16SI "4") (VNx8DI "4")
+ (VNx32HF "4") (VNx16SF "4") (VNx8DF "4")])
+
+;; The number of instruction bytes needed for an SVE_STRUCT move. This is
+;; equal to vector_count * 4.
+(define_mode_attr insn_length [(VNx32QI "8") (VNx16HI "8")
+ (VNx8SI "8") (VNx4DI "8")
+ (VNx16HF "8") (VNx8SF "8") (VNx4DF "8")
+ (VNx48QI "12") (VNx24HI "12")
+ (VNx12SI "12") (VNx6DI "12")
+ (VNx24HF "12") (VNx12SF "12") (VNx6DF "12")
+ (VNx64QI "16") (VNx32HI "16")
+ (VNx16SI "16") (VNx8DI "16")
+ (VNx32HF "16") (VNx16SF "16") (VNx8DF "16")])
+
+;; The type of a subvector in an SVE_STRUCT.
+(define_mode_attr VSINGLE [(VNx32QI "VNx16QI")
+ (VNx16HI "VNx8HI") (VNx16HF "VNx8HF")
+ (VNx8SI "VNx4SI") (VNx8SF "VNx4SF")
+ (VNx4DI "VNx2DI") (VNx4DF "VNx2DF")
+ (VNx48QI "VNx16QI")
+ (VNx24HI "VNx8HI") (VNx24HF "VNx8HF")
+ (VNx12SI "VNx4SI") (VNx12SF "VNx4SF")
+ (VNx6DI "VNx2DI") (VNx6DF "VNx2DF")
+ (VNx64QI "VNx16QI")
+ (VNx32HI "VNx8HI") (VNx32HF "VNx8HF")
+ (VNx16SI "VNx4SI") (VNx16SF "VNx4SF")
+ (VNx8DI "VNx2DI") (VNx8DF "VNx2DF")])
+
+;; ...and again in lower case.
+(define_mode_attr vsingle [(VNx32QI "vnx16qi")
+ (VNx16HI "vnx8hi") (VNx16HF "vnx8hf")
+ (VNx8SI "vnx4si") (VNx8SF "vnx4sf")
+ (VNx4DI "vnx2di") (VNx4DF "vnx2df")
+ (VNx48QI "vnx16qi")
+ (VNx24HI "vnx8hi") (VNx24HF "vnx8hf")
+ (VNx12SI "vnx4si") (VNx12SF "vnx4sf")
+ (VNx6DI "vnx2di") (VNx6DF "vnx2df")
+ (VNx64QI "vnx16qi")
+ (VNx32HI "vnx8hi") (VNx32HF "vnx8hf")
+ (VNx16SI "vnx4si") (VNx16SF "vnx4sf")
+ (VNx8DI "vnx2di") (VNx8DF "vnx2df")])
+
+;; The predicate mode associated with an SVE data mode. For structure modes
+;; this is equivalent to the <VPRED> of the subvector mode.
(define_mode_attr VPRED [(VNx16QI "VNx16BI")
(VNx8HI "VNx8BI") (VNx8HF "VNx8BI")
(VNx4SI "VNx4BI") (VNx4SF "VNx4BI")
- (VNx2DI "VNx2BI") (VNx2DF "VNx2BI")])
+ (VNx2DI "VNx2BI") (VNx2DF "VNx2BI")
+ (VNx32QI "VNx16BI")
+ (VNx16HI "VNx8BI") (VNx16HF "VNx8BI")
+ (VNx8SI "VNx4BI") (VNx8SF "VNx4BI")
+ (VNx4DI "VNx2BI") (VNx4DF "VNx2BI")
+ (VNx48QI "VNx16BI")
+ (VNx24HI "VNx8BI") (VNx24HF "VNx8BI")
+ (VNx12SI "VNx4BI") (VNx12SF "VNx4BI")
+ (VNx6DI "VNx2BI") (VNx6DF "VNx2BI")
+ (VNx64QI "VNx16BI")
+ (VNx32HI "VNx8BI") (VNx32HF "VNx8BI")
+ (VNx16SI "VNx4BI") (VNx16SF "VNx4BI")
+ (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")])
;; ...and again in lower case.
(define_mode_attr vpred [(VNx16QI "vnx16bi")
(VNx8HI "vnx8bi") (VNx8HF "vnx8bi")
(VNx4SI "vnx4bi") (VNx4SF "vnx4bi")
- (VNx2DI "vnx2bi") (VNx2DF "vnx2bi")])
+ (VNx2DI "vnx2bi") (VNx2DF "vnx2bi")
+ (VNx32QI "vnx16bi")
+ (VNx16HI "vnx8bi") (VNx16HF "vnx8bi")
+ (VNx8SI "vnx4bi") (VNx8SF "vnx4bi")
+ (VNx4DI "vnx2bi") (VNx4DF "vnx2bi")
+ (VNx48QI "vnx16bi")
+ (VNx24HI "vnx8bi") (VNx24HF "vnx8bi")
+ (VNx12SI "vnx4bi") (VNx12SF "vnx4bi")
+ (VNx6DI "vnx2bi") (VNx6DF "vnx2bi")
+ (VNx64QI "vnx16bi")
+ (VNx32HI "vnx8bi") (VNx32HF "vnx4bi")
+ (VNx16SI "vnx4bi") (VNx16SF "vnx4bi")
+ (VNx8DI "vnx2bi") (VNx8DF "vnx2bi")])
;; -------------------------------------------------------------------
;; Code Iterators
(match_operand 0 "aarch64_sve_ldr_operand")
(match_test "aarch64_mov_operand_p (op, mode)"))))
+(define_predicate "aarch64_sve_struct_memory_operand"
+ (and (match_code "mem")
+ (match_test "aarch64_sve_struct_memory_operand_p (op)")))
+
+(define_predicate "aarch64_sve_struct_nonimmediate_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "aarch64_sve_struct_memory_operand")))
+
;; Doesn't include immediates, since those are handled by the move
;; patterns instead.
(define_predicate "aarch64_sve_dup_operand"
+2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
+ * lib/target-supports.exp (check_effective_target_vect_load_lanes):
+ Return true for SVE too.
+ * g++.dg/vect/pr36648.cc: XFAIL for variable-length vectors
+ if load/store lanes are supported.
+ * gcc.dg/vect/slp-10.c: Likewise.
+ * gcc.dg/vect/slp-12c.c: Likewise.
+ * gcc.dg/vect/slp-17.c: Likewise.
+ * gcc.dg/vect/slp-33.c: Likewise.
+ * gcc.dg/vect/slp-6.c: Likewise.
+ * gcc.dg/vect/slp-cond-1.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-11-big-array.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-11.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-12.c: Likewise.
+ * gcc.dg/vect/slp-perm-5.c: Remove XFAIL for variable-length SVE.
+ * gcc.dg/vect/slp-perm-6.c: Likewise.
+ * gcc.dg/vect/slp-perm-9.c: Likewise.
+ * gcc.dg/vect/slp-reduc-6.c: Remove XFAIL for variable-length vectors.
+ * gcc.dg/vect/vect-load-lanes-peeling-1.c: Expect an epilogue loop
+ for variable-length vectors.
+
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
targets, ! vect_no_align is a sufficient test. */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } xfail { vect_variable_length && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult } xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult } xfail { vect_variable_length && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && {! {vect_int_mult}}} } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target {vect_uintfloat_cvt && vect_int_mult} xfail { vect_variable_length && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_uintfloat_cvt}} && {! {vect_int_mult}}} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target vect_int_mult} } } */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target { ! { vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { ! { vect_int_mult } } } } } */
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail { vect_variable_length && vect_load_lanes } } } } */
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail { vect_variable_length && vect_load_lanes } } } } */
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
-/* Fails for variable-length SVE because we fall back to Advanced SIMD
- and use LD3/ST3. Will be fixed when SVE LOAD_LANES support is added. */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && {! vect_load_lanes } } xfail { aarch64_sve && vect_variable_length } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
-/* Fails for variable-length SVE because we fall back to Advanced SIMD
- and use LD3/ST3. Will be fixed when SVE LOAD_LANES support is added. */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && {! vect_load_lanes } } xfail { aarch64_sve && vect_variable_length } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes xfail { vect_variable_length && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
return 0;
}
-/* Fails for variable-length SVE because we fall back to Advanced SIMD
- and use LD3/ST3. Will be fixed when SVE LOAD_LANES support is added. */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target { ! { vect_perm_short || vect_load_lanes } } xfail { aarch64_sve && vect_variable_length } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target { ! { vect_perm_short || vect_load_lanes } } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_short || vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target { vect_perm_short && { ! vect_perm3_short } } } } } */
/* { dg-final { scan-tree-dump-not "permutation requires at least three vectors" "vect" { target vect_perm3_short } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_int_add || { ! { vect_unpack || vect_strided2 } } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 1 "vect" { target { ! vect_no_int_add } xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 1 "vect" { target { ! vect_no_int_add } } } } */
}
/* { dg-final { scan-tree-dump-not "Data access with gaps" "vect" } } */
-/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" } } */
+/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" { xfail vect_variable_length } } } */
} else {
set et_vect_load_lanes 0
if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
- || ([istarget aarch64*-*-*]
- && ![check_effective_target_aarch64_sve]) } {
+ || [istarget aarch64*-*-*] } {
set et_vect_load_lanes 1
}
}