From 9b0700571fe390afcca32dcb3b2122640e628c95 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 10 Jan 2020 18:44:39 +0000 Subject: [PATCH] [AArch64] Make -msve-vector-bits=128 generate VL-specific code related_vector_mode and compatible_vector_types_p make it possible to generate 128-bit SVE code while still maintaining the distinction between SVE vectors and Advanced SIMD vectors. We can therefore generate VL-specific code for -msve-vector-bits=128 on little-endian targets. In theory we could do the same for big-endian targets, but it could have quite a high overhead; see the comment in the patch for details. 2020-01-10 Richard Sandiford gcc/ * doc/invoke.texi (-msve-vector-bits=): Document that -msve-vector-bits=128 now generates VL-specific code for little-endian targets. * config/aarch64/aarch64-sve-builtins.cc (register_builtin_types): Use build_vector_type_for_mode to construct the data vector types. * config/aarch64/aarch64.c (aarch64_convert_sve_vector_bits): Generate VL-specific code for -msve-vector-bits=128 on little-endian targets. (aarch64_simd_container_mode): Always prefer Advanced SIMD modes for 128-bit vectors. gcc/testsuite/ * gcc.target/aarch64/sve/struct_vect_1.c (N): Protect with #ifndef. * gcc.target/aarch64/sve/pcs/return_1_128.c: New test. * gcc.target/aarch64/sve/pcs/return_4_128.c: Likewise. * gcc.target/aarch64/sve/pcs/return_5_128.c: Likewise. * gcc.target/aarch64/sve/pcs/return_6_128.c: Likewise. * gcc.target/aarch64/sve/pcs/stack_clash_1_128.c: Likewise. * gcc.target/aarch64/sve/pcs/stack_clash_2_128.c: Likewise. * gcc.target/aarch64/sve/single_5.c: Likewise. * gcc.target/aarch64/sve/struct_vect_25.c: Likewise. * gcc.target/aarch64/sve/struct_vect_26.c: Likewise. From-SVN: r280125 --- gcc/ChangeLog | 12 + gcc/config/aarch64/aarch64-sve-builtins.cc | 10 +- gcc/config/aarch64/aarch64.c | 20 +- gcc/doc/invoke.texi | 9 +- gcc/testsuite/ChangeLog | 13 + .../gcc.target/aarch64/sve/pcs/return_1_128.c | 31 ++ .../gcc.target/aarch64/sve/pcs/return_4_128.c | 237 +++++++++++++++ .../gcc.target/aarch64/sve/pcs/return_5_128.c | 237 +++++++++++++++ .../gcc.target/aarch64/sve/pcs/return_6_128.c | 232 ++++++++++++++ .../aarch64/sve/pcs/stack_clash_1_128.c | 184 ++++++++++++ .../aarch64/sve/pcs/stack_clash_2_128.c | 284 ++++++++++++++++++ .../gcc.target/aarch64/sve/single_5.c | 27 ++ .../gcc.target/aarch64/sve/struct_vect_1.c | 2 + .../gcc.target/aarch64/sve/struct_vect_25.c | 38 +++ .../gcc.target/aarch64/sve/struct_vect_26.c | 35 +++ 15 files changed, 1358 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_128.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_128.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_128.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_128.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_128.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/single_5.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/struct_vect_25.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/struct_vect_26.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ab5131a7337..d054902d428 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2020-01-10 Richard Sandiford + + * doc/invoke.texi (-msve-vector-bits=): Document that + -msve-vector-bits=128 now generates VL-specific code for + little-endian targets. + * config/aarch64/aarch64-sve-builtins.cc (register_builtin_types): Use + build_vector_type_for_mode to construct the data vector types. + * config/aarch64/aarch64.c (aarch64_convert_sve_vector_bits): Generate + VL-specific code for -msve-vector-bits=128 on little-endian targets. + (aarch64_simd_container_mode): Always prefer Advanced SIMD modes + for 128-bit vectors. + 2020-01-10 Richard Sandiford * config/aarch64/aarch64.c (aarch64_evpc_sel): Fix gen_vcond_mask diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 7aab5bdd0e9..cadfa15b6ea 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -3230,11 +3230,15 @@ register_builtin_types () } else { - unsigned int elbytes = tree_to_uhwi (TYPE_SIZE_UNIT (eltype)); + scalar_mode elmode = SCALAR_TYPE_MODE (eltype); + unsigned int elbytes = GET_MODE_SIZE (elmode); poly_uint64 nunits = exact_div (BYTES_PER_SVE_VECTOR, elbytes); - vectype = build_vector_type (eltype, nunits); + machine_mode mode + = aarch64_sve_data_mode (elmode, nunits).require (); + vectype = build_vector_type_for_mode (eltype, mode); gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype)) - && TYPE_MODE (vectype) == TYPE_MODE_RAW (vectype) + && TYPE_MODE (vectype) == mode + && TYPE_MODE_RAW (vectype) == mode && TYPE_ALIGN (vectype) == 128 && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)), BITS_PER_SVE_VECTOR)); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 190380b9a2e..4288aaa3e33 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -13937,11 +13937,17 @@ aarch64_get_arch (enum aarch64_arch arch) static poly_uint16 aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value) { - /* For now generate vector-length agnostic code for -msve-vector-bits=128. - This ensures we can clearly distinguish SVE and Advanced SIMD modes when - deciding which .md file patterns to use and when deciding whether - something is a legitimate address or constant. */ - if (value == SVE_SCALABLE || value == SVE_128) + /* 128-bit SVE and Advanced SIMD modes use different register layouts + on big-endian targets, so we would need to forbid subregs that convert + from one to the other. By default a reinterpret sequence would then + involve a store to memory in one mode and a load back in the other. + Even if we optimize that sequence using reverse instructions, + it would still be a significant potential overhead. + + For now, it seems better to generate length-agnostic code for that + case instead. */ + if (value == SVE_SCALABLE + || (value == SVE_128 && BYTES_BIG_ENDIAN)) return poly_uint16 (2, 2); else return (int) value / 64; @@ -16121,7 +16127,9 @@ aarch64_vq_mode (scalar_mode mode) static machine_mode aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) { - if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR)) + if (TARGET_SVE + && maybe_ne (width, 128) + && known_eq (width, BITS_PER_SVE_VECTOR)) return aarch64_full_sve_mode (mode).else_mode (word_mode); gcc_assert (known_eq (width, 64) || known_eq (width, 128)); diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 53df4b1fdf9..ba87fcce672 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -16262,10 +16262,11 @@ The possible values of @samp{bits} are: @samp{scalable}, @samp{128}, @samp{256}, @samp{512}, @samp{1024} and @samp{2048}. Specifying @samp{scalable} selects vector-length agnostic output. At present @samp{-msve-vector-bits=128} also generates vector-length -agnostic output. All other values generate vector-length specific code. -The behavior of these values may change in future releases and no value except -@samp{scalable} should be relied on for producing code that is portable across -different hardware SVE vector lengths. +agnostic output for big-endian targets. All other values generate +vector-length specific code. The behavior of these values may change +in future releases and no value except @samp{scalable} should be +relied on for producing code that is portable across different +hardware SVE vector lengths. The default is @samp{-msve-vector-bits=scalable}, which produces vector-length agnostic code. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4c7a61591a6..948eb74edf3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,16 @@ +2020-01-10 Richard Sandiford + + * gcc.target/aarch64/sve/struct_vect_1.c (N): Protect with #ifndef. + * gcc.target/aarch64/sve/pcs/return_1_128.c: New test. + * gcc.target/aarch64/sve/pcs/return_4_128.c: Likewise. + * gcc.target/aarch64/sve/pcs/return_5_128.c: Likewise. + * gcc.target/aarch64/sve/pcs/return_6_128.c: Likewise. + * gcc.target/aarch64/sve/pcs/stack_clash_1_128.c: Likewise. + * gcc.target/aarch64/sve/pcs/stack_clash_2_128.c: Likewise. + * gcc.target/aarch64/sve/single_5.c: Likewise. + * gcc.target/aarch64/sve/struct_vect_25.c: Likewise. + * gcc.target/aarch64/sve/struct_vect_26.c: Likewise. + 2020-01-10 Martin Sebor PR c/93132 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_128.c new file mode 100644 index 00000000000..633c15ec658 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_128.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target aarch64_little_endian } } */ +/* { dg-options "-O -msve-vector-bits=128 -g" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** callee_pred: +** ldr p0, \[x0\] +** ret +*/ +__SVBool_t __attribute__((noipa)) +callee_pred (__SVBool_t *ptr) +{ + return *ptr; +} + +#include + +/* +** caller_pred: +** ... +** bl callee_pred +** cntp x0, p0, p0.b +** ldp x29, x30, \[sp\], 16 +** ret +*/ +uint64_t __attribute__((noipa)) +caller_pred (__SVBool_t *ptr1) +{ + __SVBool_t p = callee_pred (ptr1); + return svcntp_b8 (p, p); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_128.c new file mode 100644 index 00000000000..a111b04462f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_128.c @@ -0,0 +1,237 @@ +/* { dg-do compile } */ +/* { dg-options "-O -msve-vector-bits=128 -g" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#define CALLEE(SUFFIX, TYPE) \ + TYPE __attribute__((noipa)) \ + callee_##SUFFIX (TYPE *ptr) \ + { \ + return *ptr; \ + } + +/* +** callee_s8: +** ptrue (p[0-7])\.b, vl16 +** ld1b z0\.b, \1/z, \[x0\] +** ret +*/ +CALLEE (s8, __SVInt8_t) + +/* +** callee_u8: +** ptrue (p[0-7])\.b, vl16 +** ld1b z0\.b, \1/z, \[x0\] +** ret +*/ +CALLEE (u8, __SVUint8_t) + +/* +** callee_s16: +** ptrue (p[0-7])\.b, vl16 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +CALLEE (s16, __SVInt16_t) + +/* +** callee_u16: +** ptrue (p[0-7])\.b, vl16 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +CALLEE (u16, __SVUint16_t) + +/* +** callee_f16: +** ptrue (p[0-7])\.b, vl16 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +CALLEE (f16, __SVFloat16_t) + +/* +** callee_s32: +** ptrue (p[0-7])\.b, vl16 +** ld1w z0\.s, \1/z, \[x0\] +** ret +*/ +CALLEE (s32, __SVInt32_t) + +/* +** callee_u32: +** ptrue (p[0-7])\.b, vl16 +** ld1w z0\.s, \1/z, \[x0\] +** ret +*/ +CALLEE (u32, __SVUint32_t) + +/* +** callee_f32: +** ptrue (p[0-7])\.b, vl16 +** ld1w z0\.s, \1/z, \[x0\] +** ret +*/ +CALLEE (f32, __SVFloat32_t) + +/* +** callee_s64: +** ptrue (p[0-7])\.b, vl16 +** ld1d z0\.d, \1/z, \[x0\] +** ret +*/ +CALLEE (s64, __SVInt64_t) + +/* +** callee_u64: +** ptrue (p[0-7])\.b, vl16 +** ld1d z0\.d, \1/z, \[x0\] +** ret +*/ +CALLEE (u64, __SVUint64_t) + +/* +** callee_f64: +** ptrue (p[0-7])\.b, vl16 +** ld1d z0\.d, \1/z, \[x0\] +** ret +*/ +CALLEE (f64, __SVFloat64_t) + +#include + +#define CALLER(SUFFIX, TYPE) \ + typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ + __attribute__((noipa)) \ + caller_##SUFFIX (TYPE *ptr1) \ + { \ + return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ + } + +/* +** caller_s8: +** ... +** bl callee_s8 +** ptrue (p[0-7])\.b, vl16 +** saddv (d[0-9]+), \1, z0\.b +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (s8, __SVInt8_t) + +/* +** caller_u8: +** ... +** bl callee_u8 +** ptrue (p[0-7])\.b, vl16 +** uaddv (d[0-9]+), \1, z0\.b +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (u8, __SVUint8_t) + +/* +** caller_s16: +** ... +** bl callee_s16 +** ptrue (p[0-7])\.b, vl16 +** saddv (d[0-9]+), \1, z0\.h +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (s16, __SVInt16_t) + +/* +** caller_u16: +** ... +** bl callee_u16 +** ptrue (p[0-7])\.b, vl16 +** uaddv (d[0-9]+), \1, z0\.h +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (u16, __SVUint16_t) + +/* +** caller_f16: +** ... +** bl callee_f16 +** ptrue (p[0-7])\.b, vl16 +** faddv h0, \1, z0\.h +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (f16, __SVFloat16_t) + +/* +** caller_s32: +** ... +** bl callee_s32 +** ptrue (p[0-7])\.b, vl16 +** saddv (d[0-9]+), \1, z0\.s +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (s32, __SVInt32_t) + +/* +** caller_u32: +** ... +** bl callee_u32 +** ptrue (p[0-7])\.b, vl16 +** uaddv (d[0-9]+), \1, z0\.s +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (u32, __SVUint32_t) + +/* +** caller_f32: +** ... +** bl callee_f32 +** ptrue (p[0-7])\.b, vl16 +** faddv s0, \1, z0\.s +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (f32, __SVFloat32_t) + +/* +** caller_s64: +** ... +** bl callee_s64 +** ptrue (p[0-7])\.b, vl16 +** uaddv (d[0-9]+), \1, z0\.d +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (s64, __SVInt64_t) + +/* +** caller_u64: +** ... +** bl callee_u64 +** ptrue (p[0-7])\.b, vl16 +** uaddv (d[0-9]+), \1, z0\.d +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (u64, __SVUint64_t) + +/* +** caller_f64: +** ... +** bl callee_f64 +** ptrue (p[0-7])\.b, vl16 +** faddv d0, \1, z0\.d +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (f64, __SVFloat64_t) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_128.c new file mode 100644 index 00000000000..5a674b44762 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_128.c @@ -0,0 +1,237 @@ +/* { dg-do compile } */ +/* { dg-options "-O -msve-vector-bits=128 -g" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include + +#define CALLEE(SUFFIX, TYPE) \ + TYPE __attribute__((noipa)) \ + callee_##SUFFIX (TYPE *ptr) \ + { \ + return *ptr; \ + } + +/* +** callee_s8: +** ptrue (p[0-7])\.b, vl16 +** ld1b z0\.b, \1/z, \[x0\] +** ret +*/ +CALLEE (s8, svint8_t) + +/* +** callee_u8: +** ptrue (p[0-7])\.b, vl16 +** ld1b z0\.b, \1/z, \[x0\] +** ret +*/ +CALLEE (u8, svuint8_t) + +/* +** callee_s16: +** ptrue (p[0-7])\.b, vl16 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +CALLEE (s16, svint16_t) + +/* +** callee_u16: +** ptrue (p[0-7])\.b, vl16 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +CALLEE (u16, svuint16_t) + +/* +** callee_f16: +** ptrue (p[0-7])\.b, vl16 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +CALLEE (f16, svfloat16_t) + +/* +** callee_s32: +** ptrue (p[0-7])\.b, vl16 +** ld1w z0\.s, \1/z, \[x0\] +** ret +*/ +CALLEE (s32, svint32_t) + +/* +** callee_u32: +** ptrue (p[0-7])\.b, vl16 +** ld1w z0\.s, \1/z, \[x0\] +** ret +*/ +CALLEE (u32, svuint32_t) + +/* +** callee_f32: +** ptrue (p[0-7])\.b, vl16 +** ld1w z0\.s, \1/z, \[x0\] +** ret +*/ +CALLEE (f32, svfloat32_t) + +/* +** callee_s64: +** ptrue (p[0-7])\.b, vl16 +** ld1d z0\.d, \1/z, \[x0\] +** ret +*/ +CALLEE (s64, svint64_t) + +/* +** callee_u64: +** ptrue (p[0-7])\.b, vl16 +** ld1d z0\.d, \1/z, \[x0\] +** ret +*/ +CALLEE (u64, svuint64_t) + +/* +** callee_f64: +** ptrue (p[0-7])\.b, vl16 +** ld1d z0\.d, \1/z, \[x0\] +** ret +*/ +CALLEE (f64, svfloat64_t) + +#define CALLER(SUFFIX, TYPE) \ + typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ + __attribute__((noipa)) \ + caller_##SUFFIX (TYPE *ptr1) \ + { \ + return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ + } + +/* +** caller_s8: +** ... +** bl callee_s8 +** ptrue (p[0-7])\.b, vl16 +** saddv (d[0-9]+), \1, z0\.b +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (s8, svint8_t) + +/* +** caller_u8: +** ... +** bl callee_u8 +** ptrue (p[0-7])\.b, vl16 +** uaddv (d[0-9]+), \1, z0\.b +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (u8, svuint8_t) + +/* +** caller_s16: +** ... +** bl callee_s16 +** ptrue (p[0-7])\.b, vl16 +** saddv (d[0-9]+), \1, z0\.h +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (s16, svint16_t) + +/* +** caller_u16: +** ... +** bl callee_u16 +** ptrue (p[0-7])\.b, vl16 +** uaddv (d[0-9]+), \1, z0\.h +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (u16, svuint16_t) + +/* +** caller_f16: +** ... +** bl callee_f16 +** ptrue (p[0-7])\.b, vl16 +** faddv h0, \1, z0\.h +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (f16, svfloat16_t) + +/* +** caller_s32: +** ... +** bl callee_s32 +** ptrue (p[0-7])\.b, vl16 +** saddv (d[0-9]+), \1, z0\.s +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (s32, svint32_t) + +/* +** caller_u32: +** ... +** bl callee_u32 +** ptrue (p[0-7])\.b, vl16 +** uaddv (d[0-9]+), \1, z0\.s +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (u32, svuint32_t) + +/* +** caller_f32: +** ... +** bl callee_f32 +** ptrue (p[0-7])\.b, vl16 +** faddv s0, \1, z0\.s +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (f32, svfloat32_t) + +/* +** caller_s64: +** ... +** bl callee_s64 +** ptrue (p[0-7])\.b, vl16 +** uaddv (d[0-9]+), \1, z0\.d +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (s64, svint64_t) + +/* +** caller_u64: +** ... +** bl callee_u64 +** ptrue (p[0-7])\.b, vl16 +** uaddv (d[0-9]+), \1, z0\.d +** fmov x0, \2 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (u64, svuint64_t) + +/* +** caller_f64: +** ... +** bl callee_f64 +** ptrue (p[0-7])\.b, vl16 +** faddv d0, \1, z0\.d +** ldp x29, x30, \[sp\], 16 +** ret +*/ +CALLER (f64, svfloat64_t) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_128.c new file mode 100644 index 00000000000..a470d9dbcf1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_128.c @@ -0,0 +1,232 @@ +/* { dg-do compile { target aarch64_little_endian } } */ +/* { dg-options "-O -msve-vector-bits=128 -g" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include + +typedef int8_t svint8_t __attribute__ ((vector_size (16))); +typedef uint8_t svuint8_t __attribute__ ((vector_size (16))); + +typedef int16_t svint16_t __attribute__ ((vector_size (16))); +typedef uint16_t svuint16_t __attribute__ ((vector_size (16))); +typedef __fp16 svfloat16_t __attribute__ ((vector_size (16))); + +typedef int32_t svint32_t __attribute__ ((vector_size (16))); +typedef uint32_t svuint32_t __attribute__ ((vector_size (16))); +typedef float svfloat32_t __attribute__ ((vector_size (16))); + +typedef int64_t svint64_t __attribute__ ((vector_size (16))); +typedef uint64_t svuint64_t __attribute__ ((vector_size (16))); +typedef double svfloat64_t __attribute__ ((vector_size (16))); + +#define CALLEE(SUFFIX, TYPE) \ + TYPE __attribute__((noipa)) \ + callee_##SUFFIX (TYPE *ptr) \ + { \ + return *ptr; \ + } + +/* +** callee_s8: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (s8, svint8_t) + +/* +** callee_u8: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (u8, svuint8_t) + +/* +** callee_s16: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (s16, svint16_t) + +/* +** callee_u16: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (u16, svuint16_t) + +/* +** callee_f16: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (f16, svfloat16_t) + +/* +** callee_s32: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (s32, svint32_t) + +/* +** callee_u32: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (u32, svuint32_t) + +/* +** callee_f32: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (f32, svfloat32_t) + +/* +** callee_s64: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (s64, svint64_t) + +/* +** callee_u64: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (u64, svuint64_t) + +/* +** callee_f64: +** ldr q0, \[x0\] +** ret +*/ +CALLEE (f64, svfloat64_t) + +#define CALLER(SUFFIX, TYPE) \ + void __attribute__((noipa)) \ + caller_##SUFFIX (TYPE *ptr1, TYPE *ptr2) \ + { \ + *ptr2 = callee_##SUFFIX (ptr1); \ + } + +/* +** caller_s8: +** ... +** bl callee_s8 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (s8, svint8_t) + +/* +** caller_u8: +** ... +** bl callee_u8 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (u8, svuint8_t) + +/* +** caller_s16: +** ... +** bl callee_s16 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (s16, svint16_t) + +/* +** caller_u16: +** ... +** bl callee_u16 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (u16, svuint16_t) + +/* +** caller_f16: +** ... +** bl callee_f16 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (f16, svfloat16_t) + +/* +** caller_s32: +** ... +** bl callee_s32 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (s32, svint32_t) + +/* +** caller_u32: +** ... +** bl callee_u32 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (u32, svuint32_t) + +/* +** caller_f32: +** ... +** bl callee_f32 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (f32, svfloat32_t) + +/* +** caller_s64: +** ... +** bl callee_s64 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (s64, svint64_t) + +/* +** caller_u64: +** ... +** bl callee_u64 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (u64, svuint64_t) + +/* +** caller_f64: +** ... +** bl callee_f64 +** ... +** str q0, \[[^]]*\] +** ... +** ret +*/ +CALLER (f64, svfloat64_t) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_128.c new file mode 100644 index 00000000000..0eb7d10f7a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_128.c @@ -0,0 +1,184 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -msve-vector-bits=128 -g" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#pragma GCC aarch64 "arm_sve.h" + +/* +** test_1: +** sub sp, sp, #272 +** str p4, \[sp\] +** str p5, \[sp, #1, mul vl\] +** str p6, \[sp, #2, mul vl\] +** str p7, \[sp, #3, mul vl\] +** str p8, \[sp, #4, mul vl\] +** str p9, \[sp, #5, mul vl\] +** str p10, \[sp, #6, mul vl\] +** str p11, \[sp, #7, mul vl\] +** str z8, \[sp, #1, mul vl\] +** str z9, \[sp, #2, mul vl\] +** str z10, \[sp, #3, mul vl\] +** str z11, \[sp, #4, mul vl\] +** str z12, \[sp, #5, mul vl\] +** str z13, \[sp, #6, mul vl\] +** str z14, \[sp, #7, mul vl\] +** str z15, \[sp, #8, mul vl\] +** str z16, \[sp, #9, mul vl\] +** str z17, \[sp, #10, mul vl\] +** str z18, \[sp, #11, mul vl\] +** str z19, \[sp, #12, mul vl\] +** str z20, \[sp, #13, mul vl\] +** str z21, \[sp, #14, mul vl\] +** str z22, \[sp, #15, mul vl\] +** str z23, \[sp, #16, mul vl\] +** ptrue p0\.b, vl16 +** ldr z8, \[sp, #1, mul vl\] +** ldr z9, \[sp, #2, mul vl\] +** ldr z10, \[sp, #3, mul vl\] +** ldr z11, \[sp, #4, mul vl\] +** ldr z12, \[sp, #5, mul vl\] +** ldr z13, \[sp, #6, mul vl\] +** ldr z14, \[sp, #7, mul vl\] +** ldr z15, \[sp, #8, mul vl\] +** ldr z16, \[sp, #9, mul vl\] +** ldr z17, \[sp, #10, mul vl\] +** ldr z18, \[sp, #11, mul vl\] +** ldr z19, \[sp, #12, mul vl\] +** ldr z20, \[sp, #13, mul vl\] +** ldr z21, \[sp, #14, mul vl\] +** ldr z22, \[sp, #15, mul vl\] +** ldr z23, \[sp, #16, mul vl\] +** ldr p4, \[sp\] +** ldr p5, \[sp, #1, mul vl\] +** ldr p6, \[sp, #2, mul vl\] +** ldr p7, \[sp, #3, mul vl\] +** ldr p8, \[sp, #4, mul vl\] +** ldr p9, \[sp, #5, mul vl\] +** ldr p10, \[sp, #6, mul vl\] +** ldr p11, \[sp, #7, mul vl\] +** add sp, sp, #?272 +** ret +*/ +svbool_t +test_1 (void) +{ + asm volatile ("" ::: + "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", + "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", + "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", + "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); + return svptrue_b8 (); +} + +/* +** test_2: +** ptrue p0\.b, vl16 +** ret +*/ +svbool_t +test_2 (void) +{ + asm volatile ("" ::: + "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", + "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", + "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); + return svptrue_b8 (); +} + +/* +** test_3: +** sub sp, sp, #96 +** str p5, \[sp\] +** str p6, \[sp, #1, mul vl\] +** str p11, \[sp, #2, mul vl\] +** str z8, \[sp, #1, mul vl\] +** str z13, \[sp, #2, mul vl\] +** str z19, \[sp, #3, mul vl\] +** str z20, \[sp, #4, mul vl\] +** str z22, \[sp, #5, mul vl\] +** ptrue p0\.b, vl16 +** ldr z8, \[sp, #1, mul vl\] +** ldr z13, \[sp, #2, mul vl\] +** ldr z19, \[sp, #3, mul vl\] +** ldr z20, \[sp, #4, mul vl\] +** ldr z22, \[sp, #5, mul vl\] +** ldr p5, \[sp\] +** ldr p6, \[sp, #1, mul vl\] +** ldr p11, \[sp, #2, mul vl\] +** add sp, sp, #?96 +** ret +*/ +svbool_t +test_3 (void) +{ + asm volatile ("" ::: + "z8", "z13", "z19", "z20", "z22", + "p5", "p6", "p11"); + return svptrue_b8 (); +} + +/* +** test_4: +** sub sp, sp, #16 +** str p4, \[sp\] +** ptrue p0\.b, vl16 +** ldr p4, \[sp\] +** add sp, sp, #?16 +** ret +*/ +svbool_t +test_4 (void) +{ + asm volatile ("" ::: "p4"); + return svptrue_b8 (); +} + +/* +** test_5: +** sub sp, sp, #16 +** str z15, \[sp\] +** ptrue p0\.b, vl16 +** ldr z15, \[sp\] +** add sp, sp, #?16 +** ret +*/ +svbool_t +test_5 (void) +{ + asm volatile ("" ::: "z15"); + return svptrue_b8 (); +} + +/* +** test_6: +** sub sp, sp, #16 +** str z15, \[sp\] +** mov z0\.b, #1 +** ldr z15, \[sp\] +** add sp, sp, #?16 +** ret +*/ +svint8_t +test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) +{ + asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); + return svdup_s8 (1); +} + +/* +** test_7: +** sub sp, sp, #16 +** str z16, \[sp\] +** ptrue p0\.b, vl16 +** ldr z16, \[sp\] +** add sp, sp, #?16 +** ret +*/ +svbool_t +test_7 (void) +{ + asm volatile ("" ::: "z16"); + return svptrue_b8 (); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c new file mode 100644 index 00000000000..50242d5524d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c @@ -0,0 +1,284 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -msve-vector-bits=128 -g" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#pragma GCC aarch64 "arm_sve.h" + +svbool_t take_stack_args (volatile void *, void *, int, int, int, + int, int, int, int); + +/* +** test_1: +** sub sp, sp, #32 +** str p4, \[sp\] +** ... +** ptrue p0\.b, vl16 +** ldr p4, \[sp\] +** add sp, sp, #?32 +** ret +*/ +svbool_t +test_1 (void) +{ + volatile int x = 1; + asm volatile ("" ::: "p4"); + return svptrue_b8 (); +} + +/* +** test_2: +** sub sp, sp, #64 +** stp x24, x25, \[sp, 16\] +** str x26, \[sp, 32\] +** str p4, \[sp\] +** ... +** ptrue p0\.b, vl16 +** ldr p4, \[sp\] +** ldp x24, x25, \[sp, 16\] +** ldr x26, \[sp, 32\] +** add sp, sp, #?64 +** ret +*/ +svbool_t +test_2 (void) +{ + volatile int x = 1; + asm volatile ("" ::: "p4", "x24", "x25", "x26"); + return svptrue_b8 (); +} + +/* +** test_3: +** mov x12, #?4144 +** sub sp, sp, x12 +** stp x24, x25, \[sp, 16\] +** str x26, \[sp, 32\] +** str p4, \[sp\] +** ... +** ptrue p0\.b, vl16 +** ldr p4, \[sp\] +** ldp x24, x25, \[sp, 16\] +** ldr x26, \[sp, 32\] +** add sp, sp, x12 +** ret +*/ +svbool_t +test_3 (void) +{ + volatile int x[1024]; + asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); + return svptrue_b8 (); +} + +/* +** test_4: +** sub sp, sp, #32 +** str p4, \[sp\] +** ... +** ptrue p0\.h, vl8 +** ldr p4, \[sp\] +** add sp, sp, #?32 +** ret +*/ +svbool_t +test_4 (void) +{ + volatile svint32_t b; + b = svdup_s32 (1); + asm volatile ("" ::: "p4"); + return svptrue_b16 (); +} + +/* +** test_5: +** sub sp, sp, #64 +** stp x24, x25, \[sp, 16\] +** str x26, \[sp, 32\] +** str p4, \[sp\] +** ... +** ptrue p0\.h, vl8 +** ldr p4, \[sp\] +** ldp x24, x25, \[sp, 16\] +** ldr x26, \[sp, 32\] +** add sp, sp, #?64 +** ret +*/ +svbool_t +test_5 (void) +{ + volatile svint32_t b; + b = svdup_s32 (1); + asm volatile ("" ::: "p4", "x24", "x25", "x26"); + return svptrue_b16 (); +} + +/* +** test_6: +** stp x29, x30, \[sp, -16\]! +** mov x29, sp +** sub sp, sp, #16 +** str p4, \[sp\] +** ... +** ptrue p0\.b, vl16 +** add sp, sp, #?16 +** ldr p4, \[sp\] +** add sp, sp, #?16 +** ldp x29, x30, \[sp\], 16 +** ret +*/ +svbool_t +test_6 (void) +{ + take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); + asm volatile ("" ::: "p4"); + return svptrue_b8 (); +} + +/* +** test_7: +** mov x12, #?4128 +** sub sp, sp, x12 +** stp x29, x30, \[sp, 16\] +** add x29, sp, #?16 +** str p4, \[sp\] +** sub sp, sp, #16 +** ... +** ptrue p0\.b, vl16 +** add sp, sp, #?16 +** ldr p4, \[sp\] +** add sp, sp, #?16 +** ldp x29, x30, \[sp\] +** mov x12, #?4112 +** add sp, sp, x12 +** ret +*/ +svbool_t +test_7 (void) +{ + volatile int x[1024]; + take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); + asm volatile ("" ::: "p4"); + return svptrue_b8 (); +} + +/* +** test_8: +** mov x12, #?4160 +** sub sp, sp, x12 +** stp x29, x30, \[sp, 16\] +** add x29, sp, #?16 +** stp x24, x25, \[sp, 32\] +** str x26, \[sp, 48\] +** str p4, \[sp\] +** sub sp, sp, #16 +** ... +** ptrue p0\.b, vl16 +** add sp, sp, #?16 +** ldr p4, \[sp\] +** add sp, sp, #?16 +** ldp x24, x25, \[sp, 16\] +** ldr x26, \[sp, 32\] +** ldp x29, x30, \[sp\] +** mov x12, #?4144 +** add sp, sp, x12 +** ret +*/ +svbool_t +test_8 (void) +{ + volatile int x[1024]; + take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); + asm volatile ("" ::: "p4", "x24", "x25", "x26"); + return svptrue_b8 (); +} + +/* +** test_9: +** mov x12, #?4128 +** sub sp, sp, x12 +** stp x29, x30, \[sp, 16\] +** add x29, sp, #?16 +** str p4, \[sp\] +** sub sp, sp, #16 +** ... +** ptrue p0\.b, vl16 +** sub sp, x29, #16 +** ldr p4, \[sp\] +** add sp, sp, #?16 +** ldp x29, x30, \[sp\] +** mov x12, #?4112 +** add sp, sp, x12 +** ret +*/ +svbool_t +test_9 (int n) +{ + volatile int x[1024]; + take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); + asm volatile ("" ::: "p4"); + return svptrue_b8 (); +} + +/* +** test_10: +** mov x12, #?4160 +** sub sp, sp, x12 +** stp x29, x30, \[sp, 16\] +** add x29, sp, #?16 +** stp x24, x25, \[sp, 32\] +** str x26, \[sp, 48\] +** str p4, \[sp\] +** sub sp, sp, #16 +** ... +** ptrue p0\.b, vl16 +** sub sp, x29, #16 +** ldr p4, \[sp\] +** add sp, sp, #?16 +** ldp x24, x25, \[sp, 16\] +** ldr x26, \[sp, 32\] +** ldp x29, x30, \[sp\] +** mov x12, #?4144 +** add sp, sp, x12 +** ret +*/ +svbool_t +test_10 (int n) +{ + volatile int x[1024]; + take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); + asm volatile ("" ::: "p4", "x24", "x25", "x26"); + return svptrue_b8 (); +} + +/* +** test_11: +** sub sp, sp, #65536 +** str xzr, \[sp, 1024\] +** mov x12, #?64464 +** sub sp, sp, x12 +** stp x29, x30, \[sp, 16\] +** add x29, sp, #?16 +** stp x24, x25, \[sp, 32\] +** str x26, \[sp, 48\] +** str p4, \[sp\] +** sub sp, sp, #16 +** ... +** ptrue p0\.b, vl16 +** sub sp, x29, #16 +** ldr p4, \[sp\] +** add sp, sp, #?16 +** ldp x24, x25, \[sp, 16\] +** ldr x26, \[sp, 32\] +** ldp x29, x30, \[sp\] +** add sp, sp, #?3008 +** add sp, sp, #?126976 +** ret +*/ +svbool_t +test_11 (int n) +{ + volatile int x[0x7ee4]; + take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); + asm volatile ("" ::: "p4", "x24", "x25", "x26"); + return svptrue_b8 (); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_5.c b/gcc/testsuite/gcc.target/aarch64/sve/single_5.c new file mode 100644 index 00000000000..7d16205b28c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_5.c @@ -0,0 +1,27 @@ +/* { dg-do compile { target aarch64_little_endian } } */ +/* { dg-options "-O2 -ftree-vectorize -fopenmp-simd -msve-vector-bits=128 -fno-tree-loop-distribute-patterns" } */ + +#define N 16 + +#include "single_1.c" + +/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.16b, 0x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.16b, 0x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.8h, 0x3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.8h, 0x4\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.4s, 0x5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.4s, 0x6\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfmov\tv[0-9]+\.8h, 1\.0e\+0\n} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfmov\tv[0-9]+\.4s, 2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tv[0-9]+\.2d, 3\.0e\+0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tstr\tq[0-9]+,} 11 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tstr\tq[0-9]+,} 10 } } */ + +/* { dg-final { scan-assembler-not {\twhile} } } */ +/* { dg-final { scan-assembler-not {\tb} } } */ +/* { dg-final { scan-assembler-not {\tcmp} } } */ +/* { dg-final { scan-assembler-not {\tindex} } } */ +/* { dg-final { scan-assembler-not {\tptrue\t} { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c index 918a5813828..8964e784cfe 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c @@ -11,7 +11,9 @@ #define NAME(X) X #endif +#ifndef N #define N 1024 +#endif void __attribute__ ((noinline, noclone)) NAME(f2) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_25.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_25.c new file mode 100644 index 00000000000..bde6d04679c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_25.c @@ -0,0 +1,38 @@ +/* { dg-do compile { target { aarch64_little_endian && aarch64_asm_sve_ok } } } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=128 --save-temps" } */ + +#include "struct_vect_14.c" + +/* { dg-final { scan-assembler-times {\tld2\t{v[0-9]+\.16b - v[0-9]+\.16b}, \[x[0-9]+\], #?32\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3\t{v[0-9]+\.16b - v[0-9]+\.16b}, \[x[0-9]+\], #?48\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4\t{v[0-9]+\.16b - v[0-9]+\.16b}, \[x[0-9]+\], #?64\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2\t{v[0-9]+\.16b - v[0-9]+\.16b}, \[x[0-9]+\], #?32\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3\t{v[0-9]+\.16b - v[0-9]+\.16b}, \[x[0-9]+\], #?48\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4\t{v[0-9]+\.16b - v[0-9]+\.16b}, \[x[0-9]+\], #?64\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?32\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld3\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?48\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld4\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?64\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tst2\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?32\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tst3\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?48\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tst4\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?64\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld2\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?32\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?48\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?64\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?32\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?48\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4\t{v[0-9]+\.8h - v[0-9]+\.8h}, \[x[0-9]+\], #?64\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2\t{v[0-9]+\.4s - v[0-9]+\.4s}, \[x[0-9]+\], #?32\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3\t{v[0-9]+\.4s - v[0-9]+\.4s}, \[x[0-9]+\], #?48\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4\t{v[0-9]+\.4s - v[0-9]+\.4s}, \[x[0-9]+\], #?64\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2\t{v[0-9]+\.4s - v[0-9]+\.4s}, \[x[0-9]+\], #?32\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3\t{v[0-9]+\.4s - v[0-9]+\.4s}, \[x[0-9]+\], #?48\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4\t{v[0-9]+\.4s - v[0-9]+\.4s}, \[x[0-9]+\], #?64\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld2\t{v[0-9]+\.2d - v[0-9]+\.2d}, \[x[0-9]+\], #?32\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3\t{v[0-9]+\.2d - v[0-9]+\.2d}, \[x[0-9]+\], #?48\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4\t{v[0-9]+\.2d - v[0-9]+\.2d}, \[x[0-9]+\], #?64\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2\t{v[0-9]+\.2d - v[0-9]+\.2d}, \[x[0-9]+\], #?32\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3\t{v[0-9]+\.2d - v[0-9]+\.2d}, \[x[0-9]+\], #?48\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4\t{v[0-9]+\.2d - v[0-9]+\.2d}, \[x[0-9]+\], #?64\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_26.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_26.c new file mode 100644 index 00000000000..cb3588cf471 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_26.c @@ -0,0 +1,35 @@ +/* { dg-do compile { target { aarch64_little_endian && aarch64_asm_sve_ok } } } */ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=128 --save-temps" } */ + +int n; +#define N n + +#include "struct_vect_14.c" + +/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ -- 2.30.2