+2020-01-31 Dennis Zhang <dennis.zhang@arm.com>
+ Matthew Malcomson <matthew.malcomson@arm.com>
+ Richard Sandiford <richard.sandiford@arm.com>
+
+ * doc/invoke.texi (f32mm): Document new AArch64 -march= extension.
+ * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
+ __ARM_FEATURE_SVE_MATMUL_INT8, __ARM_FEATURE_SVE_MATMUL_FP32 and
+ __ARM_FEATURE_SVE_MATMUL_FP64 as appropriate. Don't define
+ __ARM_FEATURE_MATMUL_FP64.
+ * config/aarch64/aarch64-option-extensions.def (fp, simd, fp16)
+ (sve): Add AARCH64_FL_F32MM to the list of extensions that should
+ be disabled at the same time.
+ (f32mm): New extension.
+ * config/aarch64/aarch64.h (AARCH64_FL_F32MM): New macro.
+ (AARCH64_FL_F64MM): Bump to the next bit up.
+ (AARCH64_ISA_F32MM, TARGET_SVE_I8MM, TARGET_F32MM, TARGET_SVE_F32MM)
+ (TARGET_SVE_F64MM): New macros.
+ * config/aarch64/iterators.md (SVE_MATMULF): New mode iterator.
+ (UNSPEC_FMMLA, UNSPEC_SMATMUL, UNSPEC_UMATMUL, UNSPEC_USMATMUL)
+ (UNSPEC_TRN1Q, UNSPEC_TRN2Q, UNSPEC_UZP1Q, UNSPEC_UZP2Q, UNSPEC_ZIP1Q)
+ (UNSPEC_ZIP2Q): New unspeccs.
+ (DOTPROD_US_ONLY, PERMUTEQ, MATMUL, FMMLA): New int iterators.
+ (optab, sur, perm_insn): Handle the new unspecs.
+ (sve_fp_op): Handle UNSPEC_FMMLA. Resort.
+ * config/aarch64/aarch64-sve.md (@aarch64_sve_ld1ro<mode>): Use
+ TARGET_SVE_F64MM instead of separate tests.
+ (@aarch64_<DOTPROD_US_ONLY:sur>dot_prod<vsi2qi>): New pattern.
+ (@aarch64_<DOTPROD_US_ONLY:sur>dot_prod_lane<vsi2qi>): Likewise.
+ (@aarch64_sve_add_<MATMUL:optab><vsi2qi>): Likewise.
+ (@aarch64_sve_<FMMLA:sve_fp_op><mode>): Likewise.
+ (@aarch64_sve_<PERMUTEQ:optab><mode>): Likewise.
+ * config/aarch64/aarch64-sve-builtins.cc (TYPES_s_float): New macro.
+ (TYPES_s_float_hsd_integer, TYPES_s_float_sd_integer): Use it.
+ (TYPES_s_signed): New macro.
+ (TYPES_s_integer): Use it.
+ (TYPES_d_float): New macro.
+ (TYPES_d_data): Use it.
+ * config/aarch64/aarch64-sve-builtins-shapes.h (mmla): Declare.
+ (ternary_intq_uintq_lane, ternary_intq_uintq_opt_n, ternary_uintq_intq)
+ (ternary_uintq_intq_lane, ternary_uintq_intq_opt_n): Likewise.
+ * config/aarch64/aarch64-sve-builtins-shapes.cc (mmla_def): New class.
+ (svmmla): New shape.
+ (ternary_resize2_opt_n_base): Add TYPE_CLASS2 and TYPE_CLASS3
+ template parameters.
+ (ternary_resize2_lane_base): Likewise.
+ (ternary_resize2_base): New class.
+ (ternary_qq_lane_base): Likewise.
+ (ternary_intq_uintq_lane_def): Likewise.
+ (ternary_intq_uintq_lane): New shape.
+ (ternary_intq_uintq_opt_n_def): New class
+ (ternary_intq_uintq_opt_n): New shape.
+ (ternary_qq_lane_def): Inherit from ternary_qq_lane_base.
+ (ternary_uintq_intq_def): New class.
+ (ternary_uintq_intq): New shape.
+ (ternary_uintq_intq_lane_def): New class.
+ (ternary_uintq_intq_lane): New shape.
+ (ternary_uintq_intq_opt_n_def): New class.
+ (ternary_uintq_intq_opt_n): New shape.
+ * config/aarch64/aarch64-sve-builtins-base.h (svmmla, svsudot)
+ (svsudot_lane, svtrn1q, svtrn2q, svusdot, svusdot_lane, svusmmla)
+ (svuzp1q, svuzp2q, svzip1q, svzip2q): Declare.
+ * config/aarch64/aarch64-sve-builtins-base.cc (svdot_lane_impl):
+ Generalize to...
+ (svdotprod_lane_impl): ...this new class.
+ (svmmla_impl, svusdot_impl): New classes.
+ (svdot_lane): Update to use svdotprod_lane_impl.
+ (svmmla, svsudot, svsudot_lane, svtrn1q, svtrn2q, svusdot)
+ (svusdot_lane, svusmmla, svuzp1q, svuzp2q, svzip1q, svzip2q): New
+ functions.
+ * config/aarch64/aarch64-sve-builtins-base.def (svmmla): New base
+ function, with no types defined.
+ (svmmla, svusmmla, svsudot, svsudot_lane, svusdot, svusdot_lane): New
+ AARCH64_FL_I8MM functions.
+ (svmmla): New AARCH64_FL_F32MM function.
+ (svld1ro): Depend only on AARCH64_FL_F64MM, not on AARCH64_FL_V8_6.
+ (svmmla, svtrn1q, svtrn2q, svuz1q, svuz2q, svzip1q, svzip2q): New
+ AARCH64_FL_F64MM function.
+ (REQUIRED_EXTENSIONS):
+
2020-01-31 Andrew Stubbs <ams@codesourcery.com>
* config/gcn/gcn-valu.md (addv64di3_exec): Allow one '0' in each
bits = 0;
builtin_define_with_int_value ("__ARM_FEATURE_SVE_BITS", bits);
}
+ aarch64_def_or_undef (TARGET_SVE_I8MM,
+ "__ARM_FEATURE_SVE_MATMUL_INT8", pfile);
+ aarch64_def_or_undef (TARGET_SVE_F32MM,
+ "__ARM_FEATURE_SVE_MATMUL_FP32", pfile);
+ aarch64_def_or_undef (TARGET_SVE_F64MM,
+ "__ARM_FEATURE_SVE_MATMUL_FP64", pfile);
aarch64_def_or_undef (TARGET_SVE2, "__ARM_FEATURE_SVE2", pfile);
aarch64_def_or_undef (TARGET_SVE2_AES, "__ARM_FEATURE_SVE2_AES", pfile);
aarch64_def_or_undef (TARGET_SVE2_BITPERM,
aarch64_def_or_undef (TARGET_MEMTAG, "__ARM_FEATURE_MEMORY_TAGGING", pfile);
aarch64_def_or_undef (TARGET_I8MM, "__ARM_FEATURE_MATMUL_INT8", pfile);
- aarch64_def_or_undef (TARGET_F64MM, "__ARM_FEATURE_MATMUL_FP64", pfile);
aarch64_def_or_undef (TARGET_BF16_SIMD,
"__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile);
aarch64_def_or_undef (TARGET_BF16_FP,
/* Enabling "fp" just enables "fp".
Disabling "fp" also disables "simd", "crypto", "fp16", "aes", "sha2",
"sha3", sm3/sm4, "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4",
- "sve2-bitperm", "i8mm", "f64mm", and "bf16". */
+ "sve2-bitperm", "i8mm", "f32mm", "f64mm", and "bf16". */
AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | \
AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | \
AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | \
AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \
AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \
AARCH64_FL_SVE2_BITPERM | AARCH64_FL_I8MM | \
- AARCH64_FL_F64MM | AARCH64_FL_BF16, false, "fp")
+ AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_BF16,
+ false, "fp")
/* Enabling "simd" also enables "fp".
Disabling "simd" also disables "crypto", "dotprod", "aes", "sha2", "sha3",
"sm3/sm4", "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4",
- "sve2-bitperm", "i8mm", and "f64mm". */
+ "sve2-bitperm", "i8mm", "f32mm" and "f64mm". */
AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \
AARCH64_FL_CRYPTO | AARCH64_FL_DOTPROD | \
AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
AARCH64_FL_SM4 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | \
AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \
AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM | \
- AARCH64_FL_I8MM | AARCH64_FL_F64MM, false, \
- "asimd")
+ AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \
+ false, "asimd")
/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2".
Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4",
/* Enabling "fp16" also enables "fp".
Disabling "fp16" disables "fp16", "fp16fml", "sve", "sve2",
- "sve2-aes", "sve2-sha3", "sve2-sm4", "sve2-bitperm", and "f64mm". */
+ "sve2-aes", "sve2-sha3", "sve2-sm4", "sve2-bitperm", "f32mm" and
+ "f64mm". */
AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, \
- AARCH64_FL_F16FML | AARCH64_FL_SVE | AARCH64_FL_F64MM | \
- AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \
- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \
- AARCH64_FL_SVE2_BITPERM, false, \
+ AARCH64_FL_F16FML | AARCH64_FL_SVE | AARCH64_FL_F32MM | \
+ AARCH64_FL_F64MM | AARCH64_FL_SVE2 | \
+ AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \
+ AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, \
"fphp asimdhp")
/* Enabling or disabling "rcpc" only changes "rcpc". */
AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfhm")
/* Enabling "sve" also enables "fp16", "fp" and "simd".
- Disabling "sve" disables "sve", "f64mm", "sve2", "sve2-aes", "sve2-sha3",
- "sve2-sm4" and "sve2-bitperm". */
+ Disabling "sve" disables "sve", "f32mm", "f64mm", "sve2", "sve2-aes",
+ "sve2-sha3", "sve2-sm4" and "sve2-bitperm". */
AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | \
- AARCH64_FL_F16, AARCH64_FL_F64MM | AARCH64_FL_SVE2 | \
- AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \
- AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, \
- false, "sve")
+ AARCH64_FL_F16, AARCH64_FL_F32MM | AARCH64_FL_F64MM | \
+ AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \
+ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \
+ AARCH64_FL_SVE2_BITPERM, false, "sve")
/* Enabling/Disabling "profile" does not enable/disable any other feature. */
AARCH64_OPT_EXTENSION("profile", AARCH64_FL_PROFILE, 0, 0, false, "")
AARCH64_OPT_EXTENSION("i8mm", AARCH64_FL_I8MM, \
AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "i8mm")
+/* Enabling "f32mm" also enables "sve", "fp16", "fp", and "simd".
+ Disabling "f32mm" only disables "f32mm". */
+AARCH64_OPT_EXTENSION("f32mm", AARCH64_FL_F32MM, \
+ AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \
+ AARCH64_FL_SIMD, 0, false, "f32mm")
+
/* Enabling "f64mm" also enables "sve", "fp16", "fp", and "simd".
Disabling "f64mm" only disables "f64mm". */
AARCH64_OPT_EXTENSION("f64mm", AARCH64_FL_F64MM, \
}
};
-class svdot_lane_impl : public function_base
+class svdotprod_lane_impl : public unspec_based_function_base
{
public:
+ CONSTEXPR svdotprod_lane_impl (int unspec_for_sint,
+ int unspec_for_uint,
+ int unspec_for_float)
+ : unspec_based_function_base (unspec_for_sint,
+ unspec_for_uint,
+ unspec_for_float) {}
+
rtx
expand (function_expander &e) const OVERRIDE
{
/* Use the same ordering as the dot_prod_optab, with the
accumulator last. */
e.rotate_inputs_left (0, 4);
- int unspec = (e.type_suffix (0).unsigned_p ? UNSPEC_UDOT : UNSPEC_SDOT);
+ int unspec = unspec_for (e);
machine_mode mode = e.vector_mode (0);
return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode));
}
}
};
+class svmmla_impl : public function_base
+{
+public:
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ insn_code icode;
+ if (e.type_suffix (0).integer_p)
+ {
+ if (e.type_suffix (0).unsigned_p)
+ icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0));
+ else
+ icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0));
+ }
+ else
+ icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0));
+ return e.use_exact_insn (icode);
+ }
+};
+
class svmsb_impl : public function_base
{
public:
bool m_high_p;
};
+/* Also implements svsudot. */
+class svusdot_impl : public function_base
+{
+public:
+ CONSTEXPR svusdot_impl (bool su) : m_su (su) {}
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ /* The implementation of the ACLE function svsudot (for the non-lane
+ version) is through the USDOT instruction but with the second and third
+ inputs swapped. */
+ if (m_su)
+ e.rotate_inputs_left (1, 2);
+ /* The ACLE function has the same order requirements as for svdot.
+ While there's no requirement for the RTL pattern to have the same sort
+ of order as that for <sur>dot_prod, it's easier to read.
+ Hence we do the same rotation on arguments as svdot_impl does. */
+ e.rotate_inputs_left (0, 3);
+ machine_mode mode = e.vector_mode (0);
+ insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode);
+ return e.use_exact_insn (icode);
+ }
+
+private:
+ bool m_su;
+};
+
/* Implements svuzp1 and svuzp2. */
class svuzp_impl : public binary_permute
{
FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
FUNCTION (svdot, svdot_impl,)
-FUNCTION (svdot_lane, svdot_lane_impl,)
+FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1))
FUNCTION (svdup, svdup_impl,)
FUNCTION (svdup_lane, svdup_lane_impl,)
FUNCTION (svdupq, svdupq_impl,)
FUNCTION (svmla_lane, svmla_lane_impl,)
FUNCTION (svmls, svmls_impl,)
FUNCTION (svmls_lane, svmls_lane_impl,)
+FUNCTION (svmmla, svmmla_impl,)
FUNCTION (svmov, svmov_impl,)
FUNCTION (svmsb, svmsb_impl,)
FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
FUNCTION (svstnt1, svstnt1_impl,)
FUNCTION (svsub, svsub_impl,)
FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB))
+FUNCTION (svsudot, svusdot_impl, (true))
+FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1))
FUNCTION (svtbl, svtbl_impl,)
FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),)
FUNCTION (svtrn1, svtrn_impl, (0))
+FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q,
+ UNSPEC_TRN1Q))
FUNCTION (svtrn2, svtrn_impl, (1))
+FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q,
+ UNSPEC_TRN2Q))
FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL))
FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL))
FUNCTION (svundef, svundef_impl, (1))
FUNCTION (svundef4, svundef_impl, (4))
FUNCTION (svunpkhi, svunpk_impl, (true))
FUNCTION (svunpklo, svunpk_impl, (false))
+FUNCTION (svusdot, svusdot_impl, (false))
+FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1))
+FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1))
FUNCTION (svuzp1, svuzp_impl, (0))
+FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q,
+ UNSPEC_UZP1Q))
FUNCTION (svuzp2, svuzp_impl, (1))
+FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q,
+ UNSPEC_UZP2Q))
FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true))
FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false))
FUNCTION (svwrffr, svwrffr_impl,)
FUNCTION (svzip1, svzip_impl, (0))
+FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q,
+ UNSPEC_ZIP1Q))
FUNCTION (svzip2, svzip_impl, (1))
+FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q,
+ UNSPEC_ZIP2Q))
} /* end namespace aarch64_sve */
DEF_SVE_FUNCTION (svmla_lane, ternary_lane, all_float, none)
DEF_SVE_FUNCTION (svmls, ternary_opt_n, all_data, mxz)
DEF_SVE_FUNCTION (svmls_lane, ternary_lane, all_float, none)
+DEF_SVE_FUNCTION (svmmla, mmla, none, none)
DEF_SVE_FUNCTION (svmov, unary, b, z)
DEF_SVE_FUNCTION (svmsb, ternary_opt_n, all_data, mxz)
DEF_SVE_FUNCTION (svmul, binary_opt_n, all_data, mxz)
DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS AARCH64_FL_V8_6 | AARCH64_FL_F64MM
+#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM
+DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none)
+DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none)
+DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none)
+DEF_SVE_FUNCTION (svsudot_lane, ternary_intq_uintq_lane, s_signed, none)
+DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none)
+DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_F32MM
+DEF_SVE_FUNCTION (svmmla, mmla, s_float, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_F64MM
DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit)
+DEF_SVE_FUNCTION (svmmla, mmla, d_float, none)
+DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none)
+DEF_SVE_FUNCTION (svtrn2q, binary, all_data, none)
+DEF_SVE_FUNCTION (svuzp1q, binary, all_data, none)
+DEF_SVE_FUNCTION (svuzp2q, binary, all_data, none)
+DEF_SVE_FUNCTION (svzip1q, binary, all_data, none)
+DEF_SVE_FUNCTION (svzip2q, binary, all_data, none)
#undef REQUIRED_EXTENSIONS
extern const function_base *const svmla_lane;
extern const function_base *const svmls;
extern const function_base *const svmls_lane;
+ extern const function_base *const svmmla;
extern const function_base *const svmov;
extern const function_base *const svmsb;
extern const function_base *const svmul;
extern const function_base *const svstnt1;
extern const function_base *const svsub;
extern const function_base *const svsubr;
+ extern const function_base *const svsudot;
+ extern const function_base *const svsudot_lane;
extern const function_base *const svtbl;
extern const function_base *const svtmad;
extern const function_base *const svtrn1;
+ extern const function_base *const svtrn1q;
extern const function_base *const svtrn2;
+ extern const function_base *const svtrn2q;
extern const function_base *const svtsmul;
extern const function_base *const svtssel;
extern const function_base *const svundef;
extern const function_base *const svundef4;
extern const function_base *const svunpkhi;
extern const function_base *const svunpklo;
+ extern const function_base *const svusdot;
+ extern const function_base *const svusdot_lane;
+ extern const function_base *const svusmmla;
extern const function_base *const svuzp1;
+ extern const function_base *const svuzp1q;
extern const function_base *const svuzp2;
+ extern const function_base *const svuzp2q;
extern const function_base *const svwhilele;
extern const function_base *const svwhilelt;
extern const function_base *const svwrffr;
extern const function_base *const svzip1;
+ extern const function_base *const svzip1q;
extern const function_base *const svzip2;
+ extern const function_base *const svzip2q;
}
}
}
};
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t,
+ sv<t0:quarter>_t) (for integer t0)
+ sv<t0>_t svmmla[_t0](sv<t0>_t, sv<t0>_t, sv<t0>_t) (for floating-point t0)
+
+ The functions act like the equivalent of "ternary_qq" for integer elements
+ and normal vector-only ternary functions for floating-point elements. */
+struct mmla_def : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ /* svmmla is distributed over several extensions. Allow the common
+ denominator to define the overloaded svmmla function without
+ defining any specific versions. */
+ if (group.types[0][0] != NUM_TYPE_SUFFIXES)
+ {
+ if (type_suffixes[group.types[0][0]].float_p)
+ build_all (b, "v0,v0,v0,v0", group, MODE_none);
+ else
+ build_all (b, "v0,v0,vq0,vq0", group, MODE_none);
+ }
+ }
+
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (3, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+ return error_mark_node;
+
+ /* Make sure that the function exists now, since not all forms
+ follow a set pattern after this point. */
+ tree res = r.resolve_to (r.mode_suffix_id, type);
+ if (res == error_mark_node)
+ return res;
+
+ bool float_p = type_suffixes[type].float_p;
+ unsigned int modifier = float_p ? r.SAME_SIZE : r.QUARTER_SIZE;
+ if (!r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
+ modifier)
+ || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS,
+ modifier))
+ return error_mark_node;
+
+ return res;
+ }
+};
+SHAPE (mmla)
+
/* Base class for prefetch_gather_index and prefetch_gather_offset,
which differ only in the units of the displacement. */
struct prefetch_gather_base : public overloaded_base<0>
/* Base class for ternary operations in which the first argument has the
same element type as the result, and in which the second and third
- arguments have an element type that is derived the first. MODIFIER
- is the number of element bits in the second and third arguments,
- or a function_resolver modifier that says how this precision is
- derived from the first argument's elements. */
-template<unsigned int MODIFIER>
+ arguments have an element type that is derived the first.
+
+ MODIFIER is the number of element bits in the second and third
+ arguments, or a function_resolver modifier that says how this
+ precision is derived from the first argument's elements.
+
+ TYPE_CLASS2 and TYPE_CLASS3 are the type classes of the second and
+ third arguments, or function_resolver::SAME_TYPE_CLASS if the type
+ class is the same as the first argument. */
+template<unsigned int MODIFIER,
+ type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
+ type_class_index TYPE_CLASS3 = function_resolver::SAME_TYPE_CLASS>
struct ternary_resize2_opt_n_base : public overloaded_base<0>
{
tree
type_suffix_index type;
if (!r.check_gp_argument (3, i, nargs)
|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
- || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
+ || !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2,
+ MODIFIER))
+ return error_mark_node;
+
+ return r.finish_opt_n_resolution (i + 2, i, type, TYPE_CLASS3, MODIFIER);
+ }
+};
+
+/* Like ternary_resize2_opt_n_base, but for functions that don't take
+ a final scalar argument. */
+template<unsigned int MODIFIER,
+ type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
+ type_class_index TYPE_CLASS3 = function_resolver::SAME_TYPE_CLASS>
+struct ternary_resize2_base : public overloaded_base<0>
+{
+ tree
+ resolve (function_resolver &r) const OVERRIDE
+ {
+ unsigned int i, nargs;
+ type_suffix_index type;
+ if (!r.check_gp_argument (3, i, nargs)
+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+ || !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2,
+ MODIFIER)
+ || !r.require_derived_vector_type (i + 2, i, type, TYPE_CLASS3,
MODIFIER))
return error_mark_node;
- return r.finish_opt_n_resolution (i + 2, i, type, r.SAME_TYPE_CLASS,
- MODIFIER);
+ return r.resolve_to (r.mode_suffix_id, type);
}
};
/* Like ternary_resize2_opt_n_base, but for functions that take a final
lane argument. */
-template<unsigned int MODIFIER>
+template<unsigned int MODIFIER,
+ type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
+ type_class_index TYPE_CLASS3 = function_resolver::SAME_TYPE_CLASS>
struct ternary_resize2_lane_base : public overloaded_base<0>
{
tree
type_suffix_index type;
if (!r.check_gp_argument (4, i, nargs)
|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
- || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS,
+ || !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2,
MODIFIER)
- || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS,
+ || !r.require_derived_vector_type (i + 2, i, type, TYPE_CLASS3,
MODIFIER)
|| !r.require_integer_immediate (i + 3))
return error_mark_node;
}
};
+/* A specialization of ternary_resize2_lane_base for quarter-sized
+ elements. */
+template<type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
+ type_class_index TYPE_CLASS3 = function_resolver::SAME_TYPE_CLASS>
+struct ternary_qq_lane_base
+ : public ternary_resize2_lane_base<function_resolver::QUARTER_SIZE,
+ TYPE_CLASS2, TYPE_CLASS3>
+{
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ return c.require_immediate_lane_index (3, 4);
+ }
+};
+
/* Base class for narrowing bottom unary functions. The result is half
the size of input and has class CLASS. */
template<type_class_index CLASS = function_resolver::SAME_TYPE_CLASS>
};
SHAPE (tbl_tuple)
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t,
+ uint64_t)
+
+ where the final argument is an integer constant expression in the range
+ [0, 16 / sizeof (<t0>_t) - 1]. */
+struct ternary_intq_uintq_lane_def
+ : public ternary_qq_lane_base<TYPE_signed, TYPE_unsigned>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vqs0,vqu0,su64", group, MODE_none);
+ }
+};
+SHAPE (ternary_intq_uintq_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t)
+ sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:int:quarter>_t,
+ <t0:uint:quarter>_t). */
+struct ternary_intq_uintq_opt_n_def
+ : public ternary_resize2_opt_n_base<function_resolver::QUARTER_SIZE,
+ TYPE_signed, TYPE_unsigned>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vqs0,vqu0", group, MODE_none);
+ build_all (b, "v0,v0,vqs0,squ0", group, MODE_n);
+ }
+};
+SHAPE (ternary_intq_uintq_opt_n)
+
/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, sv<t0>_t, uint64_t)
where the final argument is an integer constant expression in the
};
SHAPE (ternary_opt_n)
-/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0.quarter>_t, sv<t0.quarter>_t, uint64_t)
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t, uint64_t)
where the final argument is an integer constant expression in the range
[0, 16 / sizeof (<t0>_t) - 1]. */
-struct ternary_qq_lane_def
- : public ternary_resize2_lane_base<function_resolver::QUARTER_SIZE>
+struct ternary_qq_lane_def : public ternary_qq_lane_base<>
{
void
build (function_builder &b, const function_group_info &group) const OVERRIDE
b.add_overloaded_functions (group, MODE_none);
build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none);
}
-
- bool
- check (function_checker &c) const OVERRIDE
- {
- return c.require_immediate_lane_index (3, 4);
- }
};
SHAPE (ternary_qq_lane)
};
SHAPE (ternary_qq_lane_rotate)
-/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0.quarter>_t, sv<t0.quarter>_t)
- sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0.quarter>_t, <t0.quarter>_t)
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:quarter>_t, sv<t0:quarter>_t)
+ sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:quarter>_t, <t0:quarter>_t)
i.e. a version of the standard ternary shape ternary_opt_n in which
the element type of the last two arguments is the quarter-sized
};
SHAPE (ternary_uint)
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svu<t0:uint:quarter>_t,
+ sv<t0:int:quarter>_t). */
+struct ternary_uintq_intq_def
+ : public ternary_resize2_base<function_resolver::QUARTER_SIZE,
+ TYPE_unsigned, TYPE_signed>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vqu0,vqs0", group, MODE_none);
+ }
+};
+SHAPE (ternary_uintq_intq)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:uint:quarter>_t, sv<t0:int:quarter>_t,
+ uint64_t)
+
+ where the final argument is an integer constant expression in the range
+ [0, 16 / sizeof (<t0>_t) - 1]. */
+struct ternary_uintq_intq_lane_def
+ : public ternary_qq_lane_base<TYPE_unsigned, TYPE_signed>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vqu0,vqs0,su64", group, MODE_none);
+ }
+};
+SHAPE (ternary_uintq_intq_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:uint:quarter>_t, sv<t0:int:quarter>_t)
+ sv<t0>_t svfoo[_n_t0](sv<t0>_t, sv<t0:uint:quarter>_t,
+ <t0:int:quarter>_t). */
+struct ternary_uintq_intq_opt_n_def
+ : public ternary_resize2_opt_n_base<function_resolver::QUARTER_SIZE,
+ TYPE_unsigned, TYPE_signed>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vqu0,vqs0", group, MODE_none);
+ build_all (b, "v0,v0,vqu0,sqs0", group, MODE_n);
+ }
+};
+SHAPE (ternary_uintq_intq_opt_n)
+
/* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, uint64_t)
where the final argument is an integer constant expression in the
extern const function_shape *const load_gather_sv_restricted;
extern const function_shape *const load_gather_vs;
extern const function_shape *const load_replicate;
+ extern const function_shape *const mmla;
extern const function_shape *const pattern_pred;
extern const function_shape *const prefetch;
extern const function_shape *const prefetch_gather_index;
extern const function_shape *const store_scatter_offset;
extern const function_shape *const store_scatter_offset_restricted;
extern const function_shape *const tbl_tuple;
+ extern const function_shape *const ternary_intq_uintq_lane;
+ extern const function_shape *const ternary_intq_uintq_opt_n;
extern const function_shape *const ternary_lane;
extern const function_shape *const ternary_lane_rotate;
extern const function_shape *const ternary_long_lane;
extern const function_shape *const ternary_shift_left_imm;
extern const function_shape *const ternary_shift_right_imm;
extern const function_shape *const ternary_uint;
+ extern const function_shape *const ternary_uintq_intq;
+ extern const function_shape *const ternary_uintq_intq_lane;
+ extern const function_shape *const ternary_uintq_intq_opt_n;
extern const function_shape *const tmad;
extern const function_shape *const unary;
extern const function_shape *const unary_convert;
#define TYPES_hsd_integer(S, D) \
TYPES_hsd_signed (S, D), S (u16), S (u32), S (u64)
+/* _f32. */
+#define TYPES_s_float(S, D) \
+ S (f32)
+
/* _f32
_s16 _s32 _s64
_u16 _u32 _u64. */
#define TYPES_s_float_hsd_integer(S, D) \
- S (f32), TYPES_hsd_integer (S, D)
+ TYPES_s_float (S, D), TYPES_hsd_integer (S, D)
/* _f32
_s32 _s64
_u32 _u64. */
#define TYPES_s_float_sd_integer(S, D) \
- S (f32), TYPES_sd_integer (S, D)
+ TYPES_s_float (S, D), TYPES_sd_integer (S, D)
+
+/* _s32. */
+#define TYPES_s_signed(S, D) \
+ S (s32)
/* _u32. */
#define TYPES_s_unsigned(S, D) \
/* _s32 _u32. */
#define TYPES_s_integer(S, D) \
- S (s32), TYPES_s_unsigned (S, D)
+ TYPES_s_signed (S, D), TYPES_s_unsigned (S, D)
/* _s32 _s64. */
#define TYPES_sd_signed(S, D) \
#define TYPES_all_float_and_sd_integer(S, D) \
TYPES_all_float (S, D), TYPES_sd_integer (S, D)
+/* _f64. */
+#define TYPES_d_float(S, D) \
+ S (f64)
+
/* _u64. */
#define TYPES_d_unsigned(S, D) \
S (u64)
_s64
_u64. */
#define TYPES_d_data(S, D) \
- S (f64), TYPES_d_integer (S, D)
+ TYPES_d_float (S, D), TYPES_d_integer (S, D)
/* All the type combinations allowed by svcvt. */
#define TYPES_cvt(S, D) \
DEF_SVE_TYPES_ARRAY (hd_unsigned);
DEF_SVE_TYPES_ARRAY (hsd_signed);
DEF_SVE_TYPES_ARRAY (hsd_integer);
+DEF_SVE_TYPES_ARRAY (s_float);
DEF_SVE_TYPES_ARRAY (s_float_hsd_integer);
DEF_SVE_TYPES_ARRAY (s_float_sd_integer);
+DEF_SVE_TYPES_ARRAY (s_signed);
DEF_SVE_TYPES_ARRAY (s_unsigned);
DEF_SVE_TYPES_ARRAY (s_integer);
DEF_SVE_TYPES_ARRAY (sd_signed);
DEF_SVE_TYPES_ARRAY (sd_integer);
DEF_SVE_TYPES_ARRAY (sd_data);
DEF_SVE_TYPES_ARRAY (all_float_and_sd_integer);
+DEF_SVE_TYPES_ARRAY (d_float);
DEF_SVE_TYPES_ARRAY (d_unsigned);
DEF_SVE_TYPES_ARRAY (d_integer);
DEF_SVE_TYPES_ARRAY (d_data);
;; ---- [INT] MLS and MSB
;; ---- [INT] Dot product
;; ---- [INT] Sum of absolute differences
+;; ---- [INT] Matrix multiply-accumulate
;; ---- [FP] General ternary arithmetic corresponding to unspecs
;; ---- [FP] Complex multiply-add
;; ---- [FP] Trigonometric multiply-add
+;; ---- [FP] Matrix multiply-accumulate
;;
;; == Comparisons and selects
;; ---- [INT,FP] Select based on predicates
;; - LD1RD
;; - LD1RH
;; - LD1RW
+;; - LD1ROB (F64MM)
+;; - LD1ROD (F64MM)
+;; - LD1ROH (F64MM)
+;; - LD1ROW (F64MM)
;; - LD1RQB
;; - LD1RQD
;; - LD1RQH
(match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
"UO<Vesize>")]
UNSPEC_LD1RO))]
- "TARGET_SVE && TARGET_F64MM"
+ "TARGET_SVE_F64MM"
{
operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
;; -------------------------------------------------------------------------
;; Includes:
;; - SDOT
+;; - SUDOT (I8MM)
;; - UDOT
+;; - USDOT (I8MM)
;; -------------------------------------------------------------------------
;; Four-element integer dot-product with accumulation.
[(set_attr "movprfx" "*,yes")]
)
+(define_insn "@aarch64_<sur>dot_prod<vsi2qi>"
+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
+ (plus:VNx4SI_ONLY
+ (unspec:VNx4SI_ONLY
+ [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
+ (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
+ DOTPROD_US_ONLY)
+ (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))]
+ "TARGET_SVE_I8MM"
+ "@
+ <sur>dot\\t%0.s, %1.b, %2.b
+ movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
+ (plus:VNx4SI_ONLY
+ (unspec:VNx4SI_ONLY
+ [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
+ (unspec:<VSI2QI>
+ [(match_operand:<VSI2QI> 2 "register_operand" "y, y")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_SVE_LANE_SELECT)]
+ DOTPROD_I8MM)
+ (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))]
+ "TARGET_SVE_I8MM"
+ "@
+ <sur>dot\\t%0.s, %1.b, %2.b[%3]
+ movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Sum of absolute differences
;; -------------------------------------------------------------------------
}
)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Matrix multiply-accumulate
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SMMLA (I8MM)
+;; - UMMLA (I8MM)
+;; - USMMLA (I8MM)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_add_<optab><vsi2qi>"
+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
+ (plus:VNx4SI_ONLY
+ (unspec:VNx4SI_ONLY
+ [(match_operand:<VSI2QI> 2 "register_operand" "w, w")
+ (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
+ MATMUL)
+ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
+ "TARGET_SVE_I8MM"
+ "@
+ <sur>mmla\\t%0.s, %2.b, %3.b
+ movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP] General ternary arithmetic corresponding to unspecs
;; -------------------------------------------------------------------------
[(set_attr "movprfx" "*,yes")]
)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Matrix multiply-accumulate
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMMLA (F32MM,F64MM)
+;; -------------------------------------------------------------------------
+
+;; The mode iterator enforces the target requirements.
+(define_insn "@aarch64_sve_<sve_fp_op><mode>"
+ [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_MATMULF
+ [(match_operand:SVE_MATMULF 2 "register_operand" "w, w")
+ (match_operand:SVE_MATMULF 3 "register_operand" "w, w")
+ (match_operand:SVE_MATMULF 1 "register_operand" "0, w")]
+ FMMLA))]
+ "TARGET_SVE"
+ "@
+ <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
;; =========================================================================
;; == Comparisons and selects
;; =========================================================================
"<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
+;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns
+;; doesn't depend on the mode.
+(define_insn "@aarch64_sve_<optab><mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(match_operand:SVE_FULL 1 "register_operand" "w")
+ (match_operand:SVE_FULL 2 "register_operand" "w")]
+ PERMUTEQ))]
+ "TARGET_SVE_F64MM"
+ "<perm_insn>\t%0.q, %1.q, %2.q"
+)
+
;; Concatenate two vectors and extract a subvector. Note that the
;; immediate (third) operand is the lane index not the byte index.
(define_insn "@aarch64_sve_ext<mode>"
/* Brain half-precision floating-point (BFloat16) Extension. */
#define AARCH64_FL_BF16 (1ULL << 36)
-/* 8-bit Integer Matrix Multiply (F64MM) extensions. */
-#define AARCH64_FL_F64MM (1ULL << 37)
+/* 32-bit Floating-point Matrix Multiply (F32MM) extensions. */
+#define AARCH64_FL_F32MM (1ULL << 37)
+
+/* 64-bit Floating-point Matrix Multiply (F64MM) extensions. */
+#define AARCH64_FL_F64MM (1ULL << 38)
/* Has FP and SIMD. */
#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
#define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG)
#define AARCH64_ISA_V8_6 (aarch64_isa_flags & AARCH64_FL_V8_6)
#define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM)
+#define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM)
#define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM)
#define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16)
/* I8MM instructions are enabled through +i8mm. */
#define TARGET_I8MM (AARCH64_ISA_I8MM)
+#define TARGET_SVE_I8MM (TARGET_SVE && AARCH64_ISA_I8MM)
+
+/* F32MM instructions are enabled through +f32mm. */
+#define TARGET_F32MM (AARCH64_ISA_F32MM)
+#define TARGET_SVE_F32MM (TARGET_SVE && AARCH64_ISA_F32MM)
/* F64MM instructions are enabled through +f64mm. */
#define TARGET_F64MM (AARCH64_ISA_F64MM)
+#define TARGET_SVE_F64MM (TARGET_SVE && AARCH64_ISA_F64MM)
/* BF16 instructions are enabled through +bf16. */
#define TARGET_BF16_FP (AARCH64_ISA_BF16)
;; elements.
(define_mode_iterator SVE_FULL_SDF [VNx4SF VNx2DF])
+;; Same, but with the appropriate conditions for FMMLA support.
+(define_mode_iterator SVE_MATMULF [(VNx4SF "TARGET_SVE_F32MM")
+ (VNx2DF "TARGET_SVE_F64MM")])
+
;; Fully-packed SVE vector modes that have 32-bit elements.
(define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF])
UNSPEC_FMLA ; Used in aarch64-sve.md.
UNSPEC_FMLS ; Used in aarch64-sve.md.
UNSPEC_FEXPA ; Used in aarch64-sve.md.
+ UNSPEC_FMMLA ; Used in aarch64-sve.md.
UNSPEC_FTMAD ; Used in aarch64-sve.md.
UNSPEC_FTSMUL ; Used in aarch64-sve.md.
UNSPEC_FTSSEL ; Used in aarch64-sve.md.
+ UNSPEC_SMATMUL ; Used in aarch64-sve.md.
+ UNSPEC_UMATMUL ; Used in aarch64-sve.md.
+ UNSPEC_USMATMUL ; Used in aarch64-sve.md.
+ UNSPEC_TRN1Q ; Used in aarch64-sve.md.
+ UNSPEC_TRN2Q ; Used in aarch64-sve.md.
+ UNSPEC_UZP1Q ; Used in aarch64-sve.md.
+ UNSPEC_UZP2Q ; Used in aarch64-sve.md.
+ UNSPEC_ZIP1Q ; Used in aarch64-sve.md.
+ UNSPEC_ZIP2Q ; Used in aarch64-sve.md.
UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md.
UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md.
UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md.
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
(define_int_iterator DOTPROD_I8MM [UNSPEC_USDOT UNSPEC_SUDOT])
+(define_int_iterator DOTPROD_US_ONLY [UNSPEC_USDOT])
(define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN
UNSPEC_SUBHN UNSPEC_RSUBHN])
UNSPEC_TRN1 UNSPEC_TRN2
UNSPEC_UZP1 UNSPEC_UZP2])
+(define_int_iterator PERMUTEQ [UNSPEC_ZIP1Q UNSPEC_ZIP2Q
+ UNSPEC_TRN1Q UNSPEC_TRN2Q
+ UNSPEC_UZP1Q UNSPEC_UZP2Q])
+
(define_int_iterator OPTAB_PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
UNSPEC_UZP1 UNSPEC_UZP2])
(define_int_iterator SVE_PITER [UNSPEC_PFIRST UNSPEC_PNEXT])
+(define_int_iterator MATMUL [UNSPEC_SMATMUL UNSPEC_UMATMUL
+ UNSPEC_USMATMUL])
+
+(define_int_iterator FMMLA [UNSPEC_FMMLA])
+
;; Iterators for atomic operations.
(define_int_iterator ATOMIC_LDOP
(UNSPEC_PMULLB_PAIR "pmullb_pair")
(UNSPEC_PMULLT "pmullt")
(UNSPEC_PMULLT_PAIR "pmullt_pair")
+ (UNSPEC_SMATMUL "smatmul")
(UNSPEC_SQCADD90 "sqcadd90")
(UNSPEC_SQCADD270 "sqcadd270")
(UNSPEC_SQRDCMLAH "sqrdcmlah")
(UNSPEC_SQRDCMLAH90 "sqrdcmlah90")
(UNSPEC_SQRDCMLAH180 "sqrdcmlah180")
(UNSPEC_SQRDCMLAH270 "sqrdcmlah270")
+ (UNSPEC_TRN1Q "trn1q")
+ (UNSPEC_TRN2Q "trn2q")
+ (UNSPEC_UMATMUL "umatmul")
+ (UNSPEC_USMATMUL "usmatmul")
+ (UNSPEC_UZP1Q "uzp1q")
+ (UNSPEC_UZP2Q "uzp2q")
(UNSPEC_WHILERW "vec_check_raw_alias")
(UNSPEC_WHILEWR "vec_check_war_alias")
+ (UNSPEC_ZIP1Q "zip1q")
+ (UNSPEC_ZIP2Q "zip2q")
(UNSPEC_COND_FABS "abs")
(UNSPEC_COND_FADD "add")
(UNSPEC_COND_FCADD90 "cadd90")
(UNSPEC_UQRSHL "u") (UNSPEC_SQRSHL "s")
(UNSPEC_SDOT "s") (UNSPEC_UDOT "u")
(UNSPEC_USDOT "us") (UNSPEC_SUDOT "su")
+ (UNSPEC_SMATMUL "s") (UNSPEC_UMATMUL "u")
+ (UNSPEC_USMATMUL "us")
])
(define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r")
(UNSPEC_AUTIB1716 "14")])
(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip1") (UNSPEC_ZIP2 "zip2")
+ (UNSPEC_ZIP1Q "zip1") (UNSPEC_ZIP2Q "zip2")
(UNSPEC_TRN1 "trn1") (UNSPEC_TRN2 "trn2")
- (UNSPEC_UZP1 "uzp1") (UNSPEC_UZP2 "uzp2")])
+ (UNSPEC_TRN1Q "trn1") (UNSPEC_TRN2Q "trn2")
+ (UNSPEC_UZP1 "uzp1") (UNSPEC_UZP2 "uzp2")
+ (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2")])
; op code for REV instructions (size within which elements are reversed).
(define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32")
(UNSPEC_RSQRTS "frsqrts")
(UNSPEC_FADDP "faddp")
(UNSPEC_FADDV "faddv")
+ (UNSPEC_FEXPA "fexpa")
(UNSPEC_FMAXNMP "fmaxnmp")
(UNSPEC_FMAXNMV "fmaxnmv")
(UNSPEC_FMAXP "fmaxp")
(UNSPEC_FMLS "fmls")
(UNSPEC_FMLSLB "fmlslb")
(UNSPEC_FMLSLT "fmlslt")
- (UNSPEC_FEXPA "fexpa")
+ (UNSPEC_FMMLA "fmmla")
(UNSPEC_FTSMUL "ftsmul")
(UNSPEC_FTSSEL "ftssel")
(UNSPEC_COND_FABS "fabs")
Advanced SIMD and floating-point instructions. This option is enabled by
default for @option{-march=armv8.6-a}. Use of this option with architectures
prior to Armv8.2-A is not supported.
+@item f32mm
+Enable 32-bit Floating point Matrix Multiply instructions. This also enables
+SVE instructions. Use of this option with architectures prior to Armv8.2-A is
+not supported.
@item f64mm
Enable 64-bit Floating point Matrix Multiply instructions. This also enables
SVE instructions. Use of this option with architectures prior to Armv8.2-A is
+2020-01-31 Dennis Zhang <dennis.zhang@arm.com>
+ Matthew Malcomson <matthew.malcomson@arm.com>
+ Richard Sandiford <richard.sandiford@arm.com>
+
+ * lib/target-supports.exp (check_effective_target_aarch64_asm_i8mm_ok)
+ (check_effective_target_aarch64_asm_f32mm_ok): New target selectors.
+ * gcc.target/aarch64/pragma_cpp_predefs_2.c: Test handling of
+ __ARM_FEATURE_SVE_MATMUL_INT8, __ARM_FEATURE_SVE_MATMUL_FP32 and
+ __ARM_FEATURE_SVE_MATMUL_FP64.
+ * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_TRIPLE_Z):
+ (TEST_TRIPLE_Z_REV2, TEST_TRIPLE_Z_REV, TEST_TRIPLE_LANE_REG)
+ (TEST_TRIPLE_ZX): New macros.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c: Remove +sve and
+ rely on +f64mm to enable it.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/mmla_f32.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/mmla_f64.c: Likewise,
+ * gcc.target/aarch64/sve/acle/asm/mmla_s32.c: Likewise,
+ * gcc.target/aarch64/sve/acle/asm/mmla_u32.c: Likewise,
+ * gcc.target/aarch64/sve/acle/asm/sudot_lane_s32.c: Likewise,
+ * gcc.target/aarch64/sve/acle/asm/sudot_s32.c: Likewise,
+ * gcc.target/aarch64/sve/acle/asm/trn1q_f16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn1q_f32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn1q_f64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn1q_s16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn1q_s32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn1q_s64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn1q_s8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn1q_u16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn1q_u32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn1q_u64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn1q_u8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_f16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_f32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_f64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_s16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_s32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_s64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_s8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_u16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_u32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_u64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/trn2q_u8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/usdot_lane_s32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/usdot_s32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/usmmla_s32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_f16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_f32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_f64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_s16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_s32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_s64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_s8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_u16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_u32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_u64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp1q_u8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_f16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_f32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_f64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_s16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_s32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_s64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_s8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_u16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_u32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_u64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/uzp2q_u8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_f16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_f32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_f64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_s16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_s32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_s64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_s8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_u16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_u32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_u64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip1q_u8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_f16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_f32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_f64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_s16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_s32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_s64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_s8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_u16.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_u32.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_u64.c: Likewise.
+ * gcc.target/aarch64/sve/acle/asm/zip2q_u8.c: Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/mmla_1.c: Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/mmla_2.c: Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/mmla_3.c: Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/mmla_4.c: Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/mmla_5.c: Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/mmla_6.c: Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/mmla_7.c: Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c:
+ Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c:
+ Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c:
+ Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c:
+ Likewise.
+ * gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c:
+ Likewise.
+
2020-01-31 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/pcs/args_1.c: Require lp64 for
#ifdef __ARM_FEATURE_MATMUL_INT8
#error "__ARM_FEATURE_MATMUL_INT8 is defined but should not be!"
#endif
-
-#ifdef __ARM_FEATURE_MATMUL_FP64
-#error "__ARM_FEATURE_MATMUL_FP64 is defined but should not be!"
+#ifdef __ARM_FEATURE_SVE_MATMUL_INT8
+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32
+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64
+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!"
#endif
#pragma GCC push_options
#ifndef __ARM_FEATURE_MATMUL_INT8
#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!"
#endif
-#ifdef __ARM_FEATURE_MATMUL_FP64
-#error "__ARM_FEATURE_MATMUL_FP64 is defined but should not be!"
+#ifdef __ARM_FEATURE_SVE
+#error "__ARM_FEATURE_SVE is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_INT8
+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32
+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64
+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!"
#endif
#pragma GCC pop_options
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.6-a+sve")
+#ifndef __ARM_FEATURE_MATMUL_INT8
+#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!"
+#endif
+#ifndef __ARM_FEATURE_SVE
+#error "__ARM_FEATURE_SVE is not defined but should be!"
+#endif
+#ifndef __ARM_FEATURE_SVE_MATMUL_INT8
+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is not defined but should be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32
+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64
+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!"
+#endif
+#pragma GCC pop_pragma
+
#pragma GCC push_options
#pragma GCC target ("arch=armv8.2-a+i8mm")
#ifndef __ARM_FEATURE_MATMUL_INT8
#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!"
#endif
+#ifdef __ARM_FEATURE_SVE
+#error "__ARM_FEATURE_SVE is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_INT8
+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!"
+#endif
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+i8mm+sve")
+#ifndef __ARM_FEATURE_MATMUL_INT8
+#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!"
+#endif
+#ifndef __ARM_FEATURE_SVE
+#error "__ARM_FEATURE_SVE is not defined but should be!"
+#endif
+#ifndef __ARM_FEATURE_SVE_MATMUL_INT8
+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is not defined but should be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32
+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64
+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!"
+#endif
#pragma GCC pop_options
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+f32mm")
+#ifndef __ARM_FEATURE_SVE
+#error "__ARM_FEATURE_SVE is not defined but should be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_INT8
+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!"
+#endif
+#ifndef __ARM_FEATURE_SVE_MATMUL_FP32
+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is not defined but should be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64
+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!"
+#endif
+#pragma GCC pop_pragma
+
#pragma GCC push_options
#pragma GCC target ("arch=armv8.2-a+f64mm")
-#ifndef __ARM_FEATURE_MATMUL_FP64
-#error "__ARM_FEATURE_MATMUL_FP64 is not defined but should be!"
+#ifndef __ARM_FEATURE_SVE
+#error "__ARM_FEATURE_SVE is not defined but should be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_INT8
+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32
+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!"
+#endif
+#ifndef __ARM_FEATURE_SVE_MATMUL_FP64
+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is not defined but should be!"
#endif
#pragma GCC pop_options
#ifdef __ARM_FEATURE_MATMUL_INT8
#error "__ARM_FEATURE_MATMUL_INT8 is defined but should not be!"
#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32
+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64
+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!"
+#endif
#pragma GCC pop_options
#pragma GCC push_options
#ifdef __ARM_FEATURE_MATMUL_INT8
#error "__ARM_FEATURE_MATMUL_INT8 is defined but should not be!"
#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32
+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!"
+#endif
+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64
+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!"
+#endif
#pragma GCC pop_options
#ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
-/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */
/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
#include "test_sve_acle.h"
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f32mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f32mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** mmla_f32_tied1:
+** fmmla z0\.s, z4\.s, z5\.s
+** ret
+*/
+TEST_DUAL_Z (mmla_f32_tied1, svfloat32_t, svfloat32_t,
+ z0 = svmmla_f32 (z0, z4, z5),
+ z0 = svmmla (z0, z4, z5))
+
+/*
+** mmla_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fmmla z0\.s, \1\.s, z1\.s
+** ret
+*/
+TEST_DUAL_Z_REV (mmla_f32_tied2, svfloat32_t, svfloat32_t,
+ z0_res = svmmla_f32 (z4, z0, z1),
+ z0_res = svmmla (z4, z0, z1))
+
+/*
+** mmla_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fmmla z0\.s, z1\.s, \1\.s
+** ret
+*/
+TEST_DUAL_Z_REV (mmla_f32_tied3, svfloat32_t, svfloat32_t,
+ z0_res = svmmla_f32 (z4, z1, z0),
+ z0_res = svmmla (z4, z1, z0))
+
+/*
+** mmla_f32_untied:
+** movprfx z0, z1
+** fmmla z0\.s, z4\.s, z5\.s
+** ret
+*/
+TEST_DUAL_Z (mmla_f32_untied, svfloat32_t, svfloat32_t,
+ z0 = svmmla_f32 (z1, z4, z5),
+ z0 = svmmla (z1, z4, z5))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** mmla_f64_tied1:
+** fmmla z0\.d, z4\.d, z5\.d
+** ret
+*/
+TEST_DUAL_Z (mmla_f64_tied1, svfloat64_t, svfloat64_t,
+ z0 = svmmla_f64 (z0, z4, z5),
+ z0 = svmmla (z0, z4, z5))
+
+/*
+** mmla_f64_tied2:
+** mov (z[0-9]+\.d), z0\.d
+** movprfx z0, z4
+** fmmla z0\.d, \1, z1\.d
+** ret
+*/
+TEST_DUAL_Z_REV (mmla_f64_tied2, svfloat64_t, svfloat64_t,
+ z0_res = svmmla_f64 (z4, z0, z1),
+ z0_res = svmmla (z4, z0, z1))
+
+/*
+** mmla_f64_tied3:
+** mov (z[0-9]+\.d), z0\.d
+** movprfx z0, z4
+** fmmla z0\.d, z1\.d, \1
+** ret
+*/
+TEST_DUAL_Z_REV (mmla_f64_tied3, svfloat64_t, svfloat64_t,
+ z0_res = svmmla_f64 (z4, z1, z0),
+ z0_res = svmmla (z4, z1, z0))
+
+/*
+** mmla_f64_untied:
+** movprfx z0, z1
+** fmmla z0\.d, z4\.d, z5\.d
+** ret
+*/
+TEST_DUAL_Z (mmla_f64_untied, svfloat64_t, svfloat64_t,
+ z0 = svmmla_f64 (z1, z4, z5),
+ z0 = svmmla (z1, z4, z5))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** mmla_s32_tied1:
+** smmla z0\.s, z4\.b, z5\.b
+** ret
+*/
+TEST_DUAL_Z (mmla_s32_tied1, svint32_t, svint8_t,
+ z0 = svmmla_s32 (z0, z4, z5),
+ z0 = svmmla (z0, z4, z5))
+
+/*
+** mmla_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** smmla z0\.s, \1\.b, z1\.b
+** ret
+*/
+TEST_DUAL_Z_REV (mmla_s32_tied2, svint32_t, svint8_t,
+ z0_res = svmmla_s32 (z4, z0, z1),
+ z0_res = svmmla (z4, z0, z1))
+
+/*
+** mmla_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** smmla z0\.s, z1\.b, \1\.b
+** ret
+*/
+TEST_DUAL_Z_REV (mmla_s32_tied3, svint32_t, svint8_t,
+ z0_res = svmmla_s32 (z4, z1, z0),
+ z0_res = svmmla (z4, z1, z0))
+
+/*
+** mmla_s32_untied:
+** movprfx z0, z1
+** smmla z0\.s, z4\.b, z5\.b
+** ret
+*/
+TEST_DUAL_Z (mmla_s32_untied, svint32_t, svint8_t,
+ z0 = svmmla_s32 (z1, z4, z5),
+ z0 = svmmla (z1, z4, z5))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** mmla_u32_tied1:
+** ummla z0\.s, z4\.b, z5\.b
+** ret
+*/
+TEST_DUAL_Z (mmla_u32_tied1, svuint32_t, svuint8_t,
+ z0 = svmmla_u32 (z0, z4, z5),
+ z0 = svmmla (z0, z4, z5))
+
+/*
+** mmla_u32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** ummla z0\.s, \1\.b, z1\.b
+** ret
+*/
+TEST_DUAL_Z_REV (mmla_u32_tied2, svuint32_t, svuint8_t,
+ z0_res = svmmla_u32 (z4, z0, z1),
+ z0_res = svmmla (z4, z0, z1))
+
+/*
+** mmla_u32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** ummla z0\.s, z1\.b, \1\.b
+** ret
+*/
+TEST_DUAL_Z_REV (mmla_u32_tied3, svuint32_t, svuint8_t,
+ z0_res = svmmla_u32 (z4, z1, z0),
+ z0_res = svmmla (z4, z1, z0))
+
+/*
+** mmla_u32_untied:
+** movprfx z0, z1
+** ummla z0\.s, z4\.b, z5\.b
+** ret
+*/
+TEST_DUAL_Z (mmla_u32_untied, svuint32_t, svuint8_t,
+ z0 = svmmla_u32 (z1, z4, z5),
+ z0 = svmmla (z1, z4, z5))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** sudot_lane_0_s32_tied1:
+** sudot z0\.s, z2\.b, z4\.b\[0\]
+** ret
+*/
+TEST_TRIPLE_Z (sudot_lane_0_s32_tied1, svint32_t, svint8_t, svuint8_t,
+ z0 = svsudot_lane_s32 (z0, z2, z4, 0),
+ z0 = svsudot_lane (z0, z2, z4, 0))
+
+/*
+** sudot_lane_0_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z2
+** sudot z0\.s, \1\.b, z4\.b\[0\]
+** ret
+*/
+TEST_TRIPLE_Z_REV2 (sudot_lane_0_s32_tied2, svint32_t, svint8_t, svuint8_t,
+ z0_res = svsudot_lane_s32 (z2, z0, z4, 0),
+ z0_res = svsudot_lane (z2, z0, z4, 0))
+
+/*
+** sudot_lane_0_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sudot z0\.s, z2\.b, \1\.b\[0\]
+** ret
+*/
+TEST_TRIPLE_Z_REV (sudot_lane_0_s32_tied3, svint32_t, svint8_t, svuint8_t,
+ z0_res = svsudot_lane_s32 (z4, z2, z0, 0),
+ z0_res = svsudot_lane (z4, z2, z0, 0))
+
+/*
+** sudot_lane_0_s32_untied:
+** movprfx z0, z1
+** sudot z0\.s, z2\.b, z4\.b\[0\]
+** ret
+*/
+TEST_TRIPLE_Z (sudot_lane_0_s32_untied, svint32_t, svint8_t, svuint8_t,
+ z0 = svsudot_lane_s32 (z1, z2, z4, 0),
+ z0 = svsudot_lane (z1, z2, z4, 0))
+
+/*
+** sudot_lane_1_s32:
+** sudot z0\.s, z2\.b, z5\.b\[1\]
+** ret
+*/
+TEST_TRIPLE_Z (sudot_lane_1_s32, svint32_t, svint8_t, svuint8_t,
+ z0 = svsudot_lane_s32 (z0, z2, z5, 1),
+ z0 = svsudot_lane (z0, z2, z5, 1))
+
+/*
+** sudot_lane_2_s32:
+** sudot z0\.s, z2\.b, z5\.b\[2\]
+** ret
+*/
+TEST_TRIPLE_Z (sudot_lane_2_s32, svint32_t, svint8_t, svuint8_t,
+ z0 = svsudot_lane_s32 (z0, z2, z5, 2),
+ z0 = svsudot_lane (z0, z2, z5, 2))
+
+/*
+** sudot_lane_3_s32:
+** sudot z0\.s, z2\.b, z5\.b\[3\]
+** ret
+*/
+TEST_TRIPLE_Z (sudot_lane_3_s32, svint32_t, svint8_t, svuint8_t,
+ z0 = svsudot_lane_s32 (z0, z2, z5, 3),
+ z0 = svsudot_lane (z0, z2, z5, 3))
+
+/*
+** sudot_lane_z8_s32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** sudot z0\.s, z1\.b, \1\.b\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_TRIPLE_LANE_REG (sudot_lane_z8_s32, svint32_t, svint8_t, svuint8_t,
+ z8,
+ z0 = svsudot_lane_s32 (z0, z1, z8, 1),
+ z0 = svsudot_lane (z0, z1, z8, 1))
+
+/*
+** sudot_lane_z16_s32:
+** mov (z[0-7])\.d, z16\.d
+** sudot z0\.s, z1\.b, \1\.b\[1\]
+** ret
+*/
+TEST_TRIPLE_LANE_REG (sudot_lane_z16_s32, svint32_t, svint8_t, svuint8_t,
+ z16,
+ z0 = svsudot_lane_s32 (z0, z1, z16, 1),
+ z0 = svsudot_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** sudot_s32_tied1:
+** usdot z0\.s, z2\.b, z4\.b
+** ret
+*/
+TEST_TRIPLE_Z (sudot_s32_tied1, svint32_t, svint8_t, svuint8_t,
+ z0 = svsudot_s32 (z0, z2, z4),
+ z0 = svsudot (z0, z2, z4))
+
+/*
+** sudot_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** usdot z0\.s, z2\.b, \1\.b
+** ret
+*/
+TEST_TRIPLE_Z_REV (sudot_s32_tied2, svint32_t, svint8_t, svuint8_t,
+ z0_res = svsudot_s32 (z4, z2, z0),
+ z0_res = svsudot (z4, z2, z0))
+
+/*
+** sudot_w0_s32_tied:
+** mov (z[0-9]+\.b), w0
+** usdot z0\.s, z2\.b, \1
+** ret
+*/
+TEST_TRIPLE_ZX (sudot_w0_s32_tied, svint32_t, svint8_t, uint8_t,
+ z0 = svsudot_n_s32 (z0, z2, x0),
+ z0 = svsudot (z0, z2, x0))
+
+/*
+** sudot_9_s32_tied:
+** mov (z[0-9]+\.b), #9
+** usdot z0\.s, z2\.b, \1
+** ret
+*/
+TEST_TRIPLE_Z (sudot_9_s32_tied, svint32_t, svint8_t, uint8_t,
+ z0 = svsudot_n_s32 (z0, z2, 9),
+ z0 = svsudot (z0, z2, 9))
return z0_res; \
}
+#define TEST_TRIPLE_Z(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2) \
+ PROTO (NAME, TYPE1, (TYPE1 z0, TYPE1 z1, TYPE2 z2, TYPE2 z3, \
+ TYPE3 z4, TYPE3 z5, \
+ svbool_t p0, svbool_t p1)) \
+ { \
+ INVOKE (CODE1, CODE2); \
+ return z0; \
+ }
+
+#define TEST_TRIPLE_Z_REV2(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2)\
+ PROTO (NAME, TYPE1, (TYPE2 z0, TYPE2 z1, TYPE1 z2, TYPE1 z3, \
+ TYPE3 z4, TYPE3 z5, \
+ svbool_t p0, svbool_t p1)) \
+ { \
+ TYPE1 z0_res; \
+ INVOKE (CODE1, CODE2); \
+ return z0_res; \
+ }
+
+#define TEST_TRIPLE_Z_REV(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2)\
+ PROTO (NAME, TYPE1, (TYPE3 z0, TYPE3 z1, TYPE2 z2, TYPE2 z3, \
+ TYPE1 z4, TYPE1 z5, \
+ svbool_t p0, svbool_t p1)) \
+ { \
+ TYPE1 z0_res; \
+ INVOKE (CODE1, CODE2); \
+ return z0_res; \
+ }
+
#define TEST_DUAL_LANE_REG(NAME, ZTYPE1, ZTYPE2, REG, CODE1, CODE2) \
PROTO (NAME, void, (void)) \
{ \
__asm volatile ("" :: "w" (z0)); \
}
+#define TEST_TRIPLE_LANE_REG(NAME, ZTYPE1, ZTYPE2, ZTYPE3, REG, CODE1, CODE2) \
+ PROTO (NAME, void, (void)) \
+ { \
+ register ZTYPE1 z0 __asm ("z0"); \
+ register ZTYPE2 z1 __asm ("z1"); \
+ register ZTYPE3 REG __asm (#REG); \
+ __asm volatile ("" : "=w" (z0), "=w" (z1), "=w" (REG)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "w" (z0)); \
+ }
+
#define TEST_TYPE_CHANGE_Z(NAME, TYPE1, TYPE2, CODE1, CODE2) \
PROTO (NAME, TYPE1, (TYPE2 z0, TYPE2 z1, TYPE2 z2, TYPE2 z3, \
svbool_t p0, svbool_t p1)) \
return z0; \
}
+#define TEST_TRIPLE_ZX(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2) \
+ PROTO (NAME, TYPE1, (TYPE1 z0, TYPE1 z1, TYPE2 z2, TYPE2 z3, \
+ TYPE3 x0, TYPE3 x1, \
+ svbool_t p0, svbool_t p1)) \
+ { \
+ INVOKE (CODE1, CODE2); \
+ return z0; \
+ }
+
#define TEST_TYPE_CHANGE_ZX(NAME, ZTYPE1, ZTYPE2, STYPE, CODE1, CODE2) \
PROTO (NAME, ZTYPE1, (ZTYPE2 z0, ZTYPE2 z1, ZTYPE2 z2, \
ZTYPE2 z3, svbool_t p0, svbool_t p1, \
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_f16_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_f16_tied1, svfloat16_t,
+ z0 = svtrn1q_f16 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_f16_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_f16_tied2, svfloat16_t,
+ z0 = svtrn1q_f16 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_f16_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_f16_untied, svfloat16_t,
+ z0 = svtrn1q_f16 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_f32_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_f32_tied1, svfloat32_t,
+ z0 = svtrn1q_f32 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_f32_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_f32_tied2, svfloat32_t,
+ z0 = svtrn1q_f32 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_f32_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_f32_untied, svfloat32_t,
+ z0 = svtrn1q_f32 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_f64_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_f64_tied1, svfloat64_t,
+ z0 = svtrn1q_f64 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_f64_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_f64_tied2, svfloat64_t,
+ z0 = svtrn1q_f64 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_f64_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_f64_untied, svfloat64_t,
+ z0 = svtrn1q_f64 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_s16_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s16_tied1, svint16_t,
+ z0 = svtrn1q_s16 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_s16_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s16_tied2, svint16_t,
+ z0 = svtrn1q_s16 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_s16_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s16_untied, svint16_t,
+ z0 = svtrn1q_s16 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_s32_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s32_tied1, svint32_t,
+ z0 = svtrn1q_s32 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_s32_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s32_tied2, svint32_t,
+ z0 = svtrn1q_s32 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_s32_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s32_untied, svint32_t,
+ z0 = svtrn1q_s32 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_s64_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s64_tied1, svint64_t,
+ z0 = svtrn1q_s64 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_s64_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s64_tied2, svint64_t,
+ z0 = svtrn1q_s64 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_s64_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s64_untied, svint64_t,
+ z0 = svtrn1q_s64 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_s8_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s8_tied1, svint8_t,
+ z0 = svtrn1q_s8 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_s8_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s8_tied2, svint8_t,
+ z0 = svtrn1q_s8 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_s8_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_s8_untied, svint8_t,
+ z0 = svtrn1q_s8 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_u16_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u16_tied1, svuint16_t,
+ z0 = svtrn1q_u16 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_u16_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u16_tied2, svuint16_t,
+ z0 = svtrn1q_u16 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_u16_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u16_untied, svuint16_t,
+ z0 = svtrn1q_u16 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_u32_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u32_tied1, svuint32_t,
+ z0 = svtrn1q_u32 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_u32_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u32_tied2, svuint32_t,
+ z0 = svtrn1q_u32 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_u32_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u32_untied, svuint32_t,
+ z0 = svtrn1q_u32 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_u64_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u64_tied1, svuint64_t,
+ z0 = svtrn1q_u64 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_u64_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u64_tied2, svuint64_t,
+ z0 = svtrn1q_u64 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_u64_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u64_untied, svuint64_t,
+ z0 = svtrn1q_u64 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn1q_u8_tied1:
+** trn1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u8_tied1, svuint8_t,
+ z0 = svtrn1q_u8 (z0, z1),
+ z0 = svtrn1q (z0, z1))
+
+/*
+** trn1q_u8_tied2:
+** trn1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u8_tied2, svuint8_t,
+ z0 = svtrn1q_u8 (z1, z0),
+ z0 = svtrn1q (z1, z0))
+
+/*
+** trn1q_u8_untied:
+** trn1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn1q_u8_untied, svuint8_t,
+ z0 = svtrn1q_u8 (z1, z2),
+ z0 = svtrn1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_f16_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_f16_tied1, svfloat16_t,
+ z0 = svtrn2q_f16 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_f16_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_f16_tied2, svfloat16_t,
+ z0 = svtrn2q_f16 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_f16_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_f16_untied, svfloat16_t,
+ z0 = svtrn2q_f16 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_f32_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_f32_tied1, svfloat32_t,
+ z0 = svtrn2q_f32 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_f32_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_f32_tied2, svfloat32_t,
+ z0 = svtrn2q_f32 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_f32_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_f32_untied, svfloat32_t,
+ z0 = svtrn2q_f32 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_f64_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_f64_tied1, svfloat64_t,
+ z0 = svtrn2q_f64 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_f64_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_f64_tied2, svfloat64_t,
+ z0 = svtrn2q_f64 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_f64_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_f64_untied, svfloat64_t,
+ z0 = svtrn2q_f64 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_s16_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s16_tied1, svint16_t,
+ z0 = svtrn2q_s16 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_s16_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s16_tied2, svint16_t,
+ z0 = svtrn2q_s16 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_s16_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s16_untied, svint16_t,
+ z0 = svtrn2q_s16 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_s32_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s32_tied1, svint32_t,
+ z0 = svtrn2q_s32 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_s32_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s32_tied2, svint32_t,
+ z0 = svtrn2q_s32 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_s32_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s32_untied, svint32_t,
+ z0 = svtrn2q_s32 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_s64_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s64_tied1, svint64_t,
+ z0 = svtrn2q_s64 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_s64_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s64_tied2, svint64_t,
+ z0 = svtrn2q_s64 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_s64_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s64_untied, svint64_t,
+ z0 = svtrn2q_s64 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_s8_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s8_tied1, svint8_t,
+ z0 = svtrn2q_s8 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_s8_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s8_tied2, svint8_t,
+ z0 = svtrn2q_s8 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_s8_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_s8_untied, svint8_t,
+ z0 = svtrn2q_s8 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_u16_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u16_tied1, svuint16_t,
+ z0 = svtrn2q_u16 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_u16_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u16_tied2, svuint16_t,
+ z0 = svtrn2q_u16 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_u16_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u16_untied, svuint16_t,
+ z0 = svtrn2q_u16 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_u32_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u32_tied1, svuint32_t,
+ z0 = svtrn2q_u32 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_u32_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u32_tied2, svuint32_t,
+ z0 = svtrn2q_u32 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_u32_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u32_untied, svuint32_t,
+ z0 = svtrn2q_u32 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_u64_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u64_tied1, svuint64_t,
+ z0 = svtrn2q_u64 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_u64_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u64_tied2, svuint64_t,
+ z0 = svtrn2q_u64 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_u64_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u64_untied, svuint64_t,
+ z0 = svtrn2q_u64 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** trn2q_u8_tied1:
+** trn2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u8_tied1, svuint8_t,
+ z0 = svtrn2q_u8 (z0, z1),
+ z0 = svtrn2q (z0, z1))
+
+/*
+** trn2q_u8_tied2:
+** trn2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u8_tied2, svuint8_t,
+ z0 = svtrn2q_u8 (z1, z0),
+ z0 = svtrn2q (z1, z0))
+
+/*
+** trn2q_u8_untied:
+** trn2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (trn2q_u8_untied, svuint8_t,
+ z0 = svtrn2q_u8 (z1, z2),
+ z0 = svtrn2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** usdot_lane_0_s32_tied1:
+** usdot z0\.s, z2\.b, z4\.b\[0\]
+** ret
+*/
+TEST_TRIPLE_Z (usdot_lane_0_s32_tied1, svint32_t, svuint8_t, svint8_t,
+ z0 = svusdot_lane_s32 (z0, z2, z4, 0),
+ z0 = svusdot_lane (z0, z2, z4, 0))
+
+/*
+** usdot_lane_0_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z2
+** usdot z0\.s, \1\.b, z4\.b\[0\]
+** ret
+*/
+TEST_TRIPLE_Z_REV2 (usdot_lane_0_s32_tied2, svint32_t, svuint8_t, svint8_t,
+ z0_res = svusdot_lane_s32 (z2, z0, z4, 0),
+ z0_res = svusdot_lane (z2, z0, z4, 0))
+
+/*
+** usdot_lane_0_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** usdot z0\.s, z2\.b, \1\.b\[0\]
+** ret
+*/
+TEST_TRIPLE_Z_REV (usdot_lane_0_s32_tied3, svint32_t, svuint8_t, svint8_t,
+ z0_res = svusdot_lane_s32 (z4, z2, z0, 0),
+ z0_res = svusdot_lane (z4, z2, z0, 0))
+
+/*
+** usdot_lane_0_s32_untied:
+** movprfx z0, z1
+** usdot z0\.s, z2\.b, z4\.b\[0\]
+** ret
+*/
+TEST_TRIPLE_Z (usdot_lane_0_s32_untied, svint32_t, svuint8_t, svint8_t,
+ z0 = svusdot_lane_s32 (z1, z2, z4, 0),
+ z0 = svusdot_lane (z1, z2, z4, 0))
+
+/*
+** usdot_lane_1_s32:
+** usdot z0\.s, z2\.b, z5\.b\[1\]
+** ret
+*/
+TEST_TRIPLE_Z (usdot_lane_1_s32, svint32_t, svuint8_t, svint8_t,
+ z0 = svusdot_lane_s32 (z0, z2, z5, 1),
+ z0 = svusdot_lane (z0, z2, z5, 1))
+
+/*
+** usdot_lane_2_s32:
+** usdot z0\.s, z2\.b, z5\.b\[2\]
+** ret
+*/
+TEST_TRIPLE_Z (usdot_lane_2_s32, svint32_t, svuint8_t, svint8_t,
+ z0 = svusdot_lane_s32 (z0, z2, z5, 2),
+ z0 = svusdot_lane (z0, z2, z5, 2))
+
+/*
+** usdot_lane_3_s32:
+** usdot z0\.s, z2\.b, z5\.b\[3\]
+** ret
+*/
+TEST_TRIPLE_Z (usdot_lane_3_s32, svint32_t, svuint8_t, svint8_t,
+ z0 = svusdot_lane_s32 (z0, z2, z5, 3),
+ z0 = svusdot_lane (z0, z2, z5, 3))
+
+/*
+** usdot_lane_z8_s32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** usdot z0\.s, z1\.b, \1\.b\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_TRIPLE_LANE_REG (usdot_lane_z8_s32, svint32_t, svuint8_t, svint8_t,
+ z8,
+ z0 = svusdot_lane_s32 (z0, z1, z8, 1),
+ z0 = svusdot_lane (z0, z1, z8, 1))
+
+/*
+** usdot_lane_z16_s32:
+** mov (z[0-7])\.d, z16\.d
+** usdot z0\.s, z1\.b, \1\.b\[1\]
+** ret
+*/
+TEST_TRIPLE_LANE_REG (usdot_lane_z16_s32, svint32_t, svuint8_t, svint8_t,
+ z16,
+ z0 = svusdot_lane_s32 (z0, z1, z16, 1),
+ z0 = svusdot_lane (z0, z1, z16, 1))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** usdot_s32_tied1:
+** usdot z0\.s, z2\.b, z4\.b
+** ret
+*/
+TEST_TRIPLE_Z (usdot_s32_tied1, svint32_t, svuint8_t, svint8_t,
+ z0 = svusdot_s32 (z0, z2, z4),
+ z0 = svusdot (z0, z2, z4))
+
+/*
+** usdot_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** usdot z0\.s, z2\.b, \1\.b
+** ret
+*/
+TEST_TRIPLE_Z_REV (usdot_s32_tied2, svint32_t, svuint8_t, svint8_t,
+ z0_res = svusdot_s32 (z4, z2, z0),
+ z0_res = svusdot (z4, z2, z0))
+
+/*
+** usdot_w0_s32_tied:
+** mov (z[0-9]+\.b), w0
+** usdot z0\.s, z2\.b, \1
+** ret
+*/
+TEST_TRIPLE_ZX (usdot_w0_s32_tied, svint32_t, svuint8_t, int8_t,
+ z0 = svusdot_n_s32 (z0, z2, x0),
+ z0 = svusdot (z0, z2, x0))
+
+/*
+** usdot_9_s32_tied:
+** mov (z[0-9]+\.b), #9
+** usdot z0\.s, z2\.b, \1
+** ret
+*/
+TEST_TRIPLE_Z (usdot_9_s32_tied, svint32_t, svuint8_t, int8_t,
+ z0 = svusdot_n_s32 (z0, z2, 9),
+ z0 = svusdot (z0, z2, 9))
+
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** usmmla_s32_tied1:
+** usmmla z0\.s, z2\.b, z4\.b
+** ret
+*/
+TEST_TRIPLE_Z (usmmla_s32_tied1, svint32_t, svuint8_t, svint8_t,
+ z0 = svusmmla_s32 (z0, z2, z4),
+ z0 = svusmmla (z0, z2, z4))
+
+/*
+** usmmla_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z2
+** usmmla z0\.s, \1\.b, z4\.b
+** ret
+*/
+TEST_TRIPLE_Z_REV2 (usmmla_s32_tied2, svint32_t, svuint8_t, svint8_t,
+ z0_res = svusmmla_s32 (z2, z0, z4),
+ z0_res = svusmmla (z2, z0, z4))
+
+/*
+** usmmla_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** usmmla z0\.s, z2\.b, \1\.b
+** ret
+*/
+TEST_TRIPLE_Z_REV (usmmla_s32_tied3, svint32_t, svuint8_t, svint8_t,
+ z0_res = svusmmla_s32 (z4, z2, z0),
+ z0_res = svusmmla (z4, z2, z0))
+
+/*
+** usmmla_s32_untied:
+** movprfx z0, z1
+** usmmla z0\.s, z2\.b, z4\.b
+** ret
+*/
+TEST_TRIPLE_Z (usmmla_s32_untied, svint32_t, svuint8_t, svint8_t,
+ z0 = svusmmla_s32 (z1, z2, z4),
+ z0 = svusmmla (z1, z2, z4))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_f16_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_f16_tied1, svfloat16_t,
+ z0 = svuzp1q_f16 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_f16_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_f16_tied2, svfloat16_t,
+ z0 = svuzp1q_f16 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_f16_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_f16_untied, svfloat16_t,
+ z0 = svuzp1q_f16 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_f32_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_f32_tied1, svfloat32_t,
+ z0 = svuzp1q_f32 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_f32_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_f32_tied2, svfloat32_t,
+ z0 = svuzp1q_f32 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_f32_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_f32_untied, svfloat32_t,
+ z0 = svuzp1q_f32 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_f64_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_f64_tied1, svfloat64_t,
+ z0 = svuzp1q_f64 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_f64_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_f64_tied2, svfloat64_t,
+ z0 = svuzp1q_f64 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_f64_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_f64_untied, svfloat64_t,
+ z0 = svuzp1q_f64 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_s16_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s16_tied1, svint16_t,
+ z0 = svuzp1q_s16 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_s16_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s16_tied2, svint16_t,
+ z0 = svuzp1q_s16 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_s16_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s16_untied, svint16_t,
+ z0 = svuzp1q_s16 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_s32_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s32_tied1, svint32_t,
+ z0 = svuzp1q_s32 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_s32_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s32_tied2, svint32_t,
+ z0 = svuzp1q_s32 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_s32_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s32_untied, svint32_t,
+ z0 = svuzp1q_s32 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_s64_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s64_tied1, svint64_t,
+ z0 = svuzp1q_s64 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_s64_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s64_tied2, svint64_t,
+ z0 = svuzp1q_s64 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_s64_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s64_untied, svint64_t,
+ z0 = svuzp1q_s64 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_s8_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s8_tied1, svint8_t,
+ z0 = svuzp1q_s8 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_s8_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s8_tied2, svint8_t,
+ z0 = svuzp1q_s8 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_s8_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_s8_untied, svint8_t,
+ z0 = svuzp1q_s8 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_u16_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u16_tied1, svuint16_t,
+ z0 = svuzp1q_u16 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_u16_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u16_tied2, svuint16_t,
+ z0 = svuzp1q_u16 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_u16_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u16_untied, svuint16_t,
+ z0 = svuzp1q_u16 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_u32_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u32_tied1, svuint32_t,
+ z0 = svuzp1q_u32 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_u32_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u32_tied2, svuint32_t,
+ z0 = svuzp1q_u32 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_u32_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u32_untied, svuint32_t,
+ z0 = svuzp1q_u32 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_u64_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u64_tied1, svuint64_t,
+ z0 = svuzp1q_u64 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_u64_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u64_tied2, svuint64_t,
+ z0 = svuzp1q_u64 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_u64_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u64_untied, svuint64_t,
+ z0 = svuzp1q_u64 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp1q_u8_tied1:
+** uzp1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u8_tied1, svuint8_t,
+ z0 = svuzp1q_u8 (z0, z1),
+ z0 = svuzp1q (z0, z1))
+
+/*
+** uzp1q_u8_tied2:
+** uzp1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u8_tied2, svuint8_t,
+ z0 = svuzp1q_u8 (z1, z0),
+ z0 = svuzp1q (z1, z0))
+
+/*
+** uzp1q_u8_untied:
+** uzp1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp1q_u8_untied, svuint8_t,
+ z0 = svuzp1q_u8 (z1, z2),
+ z0 = svuzp1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_f16_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_f16_tied1, svfloat16_t,
+ z0 = svuzp2q_f16 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_f16_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_f16_tied2, svfloat16_t,
+ z0 = svuzp2q_f16 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_f16_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_f16_untied, svfloat16_t,
+ z0 = svuzp2q_f16 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_f32_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_f32_tied1, svfloat32_t,
+ z0 = svuzp2q_f32 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_f32_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_f32_tied2, svfloat32_t,
+ z0 = svuzp2q_f32 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_f32_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_f32_untied, svfloat32_t,
+ z0 = svuzp2q_f32 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_f64_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_f64_tied1, svfloat64_t,
+ z0 = svuzp2q_f64 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_f64_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_f64_tied2, svfloat64_t,
+ z0 = svuzp2q_f64 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_f64_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_f64_untied, svfloat64_t,
+ z0 = svuzp2q_f64 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_s16_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s16_tied1, svint16_t,
+ z0 = svuzp2q_s16 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_s16_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s16_tied2, svint16_t,
+ z0 = svuzp2q_s16 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_s16_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s16_untied, svint16_t,
+ z0 = svuzp2q_s16 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_s32_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s32_tied1, svint32_t,
+ z0 = svuzp2q_s32 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_s32_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s32_tied2, svint32_t,
+ z0 = svuzp2q_s32 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_s32_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s32_untied, svint32_t,
+ z0 = svuzp2q_s32 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_s64_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s64_tied1, svint64_t,
+ z0 = svuzp2q_s64 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_s64_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s64_tied2, svint64_t,
+ z0 = svuzp2q_s64 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_s64_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s64_untied, svint64_t,
+ z0 = svuzp2q_s64 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_s8_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s8_tied1, svint8_t,
+ z0 = svuzp2q_s8 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_s8_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s8_tied2, svint8_t,
+ z0 = svuzp2q_s8 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_s8_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_s8_untied, svint8_t,
+ z0 = svuzp2q_s8 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_u16_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u16_tied1, svuint16_t,
+ z0 = svuzp2q_u16 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_u16_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u16_tied2, svuint16_t,
+ z0 = svuzp2q_u16 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_u16_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u16_untied, svuint16_t,
+ z0 = svuzp2q_u16 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_u32_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u32_tied1, svuint32_t,
+ z0 = svuzp2q_u32 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_u32_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u32_tied2, svuint32_t,
+ z0 = svuzp2q_u32 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_u32_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u32_untied, svuint32_t,
+ z0 = svuzp2q_u32 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_u64_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u64_tied1, svuint64_t,
+ z0 = svuzp2q_u64 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_u64_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u64_tied2, svuint64_t,
+ z0 = svuzp2q_u64 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_u64_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u64_untied, svuint64_t,
+ z0 = svuzp2q_u64 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** uzp2q_u8_tied1:
+** uzp2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u8_tied1, svuint8_t,
+ z0 = svuzp2q_u8 (z0, z1),
+ z0 = svuzp2q (z0, z1))
+
+/*
+** uzp2q_u8_tied2:
+** uzp2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u8_tied2, svuint8_t,
+ z0 = svuzp2q_u8 (z1, z0),
+ z0 = svuzp2q (z1, z0))
+
+/*
+** uzp2q_u8_untied:
+** uzp2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (uzp2q_u8_untied, svuint8_t,
+ z0 = svuzp2q_u8 (z1, z2),
+ z0 = svuzp2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_f16_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_f16_tied1, svfloat16_t,
+ z0 = svzip1q_f16 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_f16_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_f16_tied2, svfloat16_t,
+ z0 = svzip1q_f16 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_f16_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_f16_untied, svfloat16_t,
+ z0 = svzip1q_f16 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_f32_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_f32_tied1, svfloat32_t,
+ z0 = svzip1q_f32 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_f32_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_f32_tied2, svfloat32_t,
+ z0 = svzip1q_f32 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_f32_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_f32_untied, svfloat32_t,
+ z0 = svzip1q_f32 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_f64_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_f64_tied1, svfloat64_t,
+ z0 = svzip1q_f64 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_f64_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_f64_tied2, svfloat64_t,
+ z0 = svzip1q_f64 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_f64_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_f64_untied, svfloat64_t,
+ z0 = svzip1q_f64 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_s16_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s16_tied1, svint16_t,
+ z0 = svzip1q_s16 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_s16_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s16_tied2, svint16_t,
+ z0 = svzip1q_s16 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_s16_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s16_untied, svint16_t,
+ z0 = svzip1q_s16 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_s32_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s32_tied1, svint32_t,
+ z0 = svzip1q_s32 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_s32_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s32_tied2, svint32_t,
+ z0 = svzip1q_s32 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_s32_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s32_untied, svint32_t,
+ z0 = svzip1q_s32 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_s64_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s64_tied1, svint64_t,
+ z0 = svzip1q_s64 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_s64_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s64_tied2, svint64_t,
+ z0 = svzip1q_s64 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_s64_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s64_untied, svint64_t,
+ z0 = svzip1q_s64 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_s8_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s8_tied1, svint8_t,
+ z0 = svzip1q_s8 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_s8_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s8_tied2, svint8_t,
+ z0 = svzip1q_s8 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_s8_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_s8_untied, svint8_t,
+ z0 = svzip1q_s8 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_u16_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u16_tied1, svuint16_t,
+ z0 = svzip1q_u16 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_u16_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u16_tied2, svuint16_t,
+ z0 = svzip1q_u16 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_u16_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u16_untied, svuint16_t,
+ z0 = svzip1q_u16 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_u32_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u32_tied1, svuint32_t,
+ z0 = svzip1q_u32 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_u32_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u32_tied2, svuint32_t,
+ z0 = svzip1q_u32 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_u32_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u32_untied, svuint32_t,
+ z0 = svzip1q_u32 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_u64_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u64_tied1, svuint64_t,
+ z0 = svzip1q_u64 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_u64_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u64_tied2, svuint64_t,
+ z0 = svzip1q_u64 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_u64_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u64_untied, svuint64_t,
+ z0 = svzip1q_u64 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip1q_u8_tied1:
+** zip1 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u8_tied1, svuint8_t,
+ z0 = svzip1q_u8 (z0, z1),
+ z0 = svzip1q (z0, z1))
+
+/*
+** zip1q_u8_tied2:
+** zip1 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u8_tied2, svuint8_t,
+ z0 = svzip1q_u8 (z1, z0),
+ z0 = svzip1q (z1, z0))
+
+/*
+** zip1q_u8_untied:
+** zip1 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip1q_u8_untied, svuint8_t,
+ z0 = svzip1q_u8 (z1, z2),
+ z0 = svzip1q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_f16_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_f16_tied1, svfloat16_t,
+ z0 = svzip2q_f16 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_f16_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_f16_tied2, svfloat16_t,
+ z0 = svzip2q_f16 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_f16_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_f16_untied, svfloat16_t,
+ z0 = svzip2q_f16 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_f32_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_f32_tied1, svfloat32_t,
+ z0 = svzip2q_f32 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_f32_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_f32_tied2, svfloat32_t,
+ z0 = svzip2q_f32 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_f32_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_f32_untied, svfloat32_t,
+ z0 = svzip2q_f32 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_f64_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_f64_tied1, svfloat64_t,
+ z0 = svzip2q_f64 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_f64_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_f64_tied2, svfloat64_t,
+ z0 = svzip2q_f64 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_f64_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_f64_untied, svfloat64_t,
+ z0 = svzip2q_f64 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_s16_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s16_tied1, svint16_t,
+ z0 = svzip2q_s16 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_s16_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s16_tied2, svint16_t,
+ z0 = svzip2q_s16 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_s16_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s16_untied, svint16_t,
+ z0 = svzip2q_s16 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_s32_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s32_tied1, svint32_t,
+ z0 = svzip2q_s32 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_s32_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s32_tied2, svint32_t,
+ z0 = svzip2q_s32 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_s32_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s32_untied, svint32_t,
+ z0 = svzip2q_s32 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_s64_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s64_tied1, svint64_t,
+ z0 = svzip2q_s64 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_s64_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s64_tied2, svint64_t,
+ z0 = svzip2q_s64 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_s64_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s64_untied, svint64_t,
+ z0 = svzip2q_s64 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_s8_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s8_tied1, svint8_t,
+ z0 = svzip2q_s8 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_s8_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s8_tied2, svint8_t,
+ z0 = svzip2q_s8 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_s8_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_s8_untied, svint8_t,
+ z0 = svzip2q_s8 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_u16_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u16_tied1, svuint16_t,
+ z0 = svzip2q_u16 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_u16_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u16_tied2, svuint16_t,
+ z0 = svzip2q_u16 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_u16_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u16_untied, svuint16_t,
+ z0 = svzip2q_u16 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_u32_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u32_tied1, svuint32_t,
+ z0 = svzip2q_u32 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_u32_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u32_tied2, svuint32_t,
+ z0 = svzip2q_u32 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_u32_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u32_untied, svuint32_t,
+ z0 = svzip2q_u32 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_u64_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u64_tied1, svuint64_t,
+ z0 = svzip2q_u64 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_u64_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u64_tied2, svuint64_t,
+ z0 = svzip2q_u64 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_u64_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u64_untied, svuint64_t,
+ z0 = svzip2q_u64 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */
+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** zip2q_u8_tied1:
+** zip2 z0\.q, z0\.q, z1\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u8_tied1, svuint8_t,
+ z0 = svzip2q_u8 (z0, z1),
+ z0 = svzip2q (z0, z1))
+
+/*
+** zip2q_u8_tied2:
+** zip2 z0\.q, z1\.q, z0\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u8_tied2, svuint8_t,
+ z0 = svzip2q_u8 (z1, z0),
+ z0 = svzip2q (z1, z0))
+
+/*
+** zip2q_u8_untied:
+** zip2 z0\.q, z1\.q, z2\.q
+** ret
+*/
+TEST_UNIFORM_Z (zip2q_u8_untied, svuint8_t,
+ z0 = svzip2q_u8 (z1, z2),
+ z0 = svzip2q (z1, z2))
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm+f32mm+f64mm" } */
+
+#include <arm_sve.h>
+
+svuint32_t
+f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32)
+{
+ svmmla_s32 (s32); /* { dg-error {too few arguments to function 'svmmla_s32'} } */
+ svmmla_s32 (s32, s8, s8, u32); /* { dg-error {too many arguments to function 'svmmla_s32'} } */
+ svmmla_s32 (s32, u32, s8); /* { dg-error {incompatible type for argument 2 of 'svmmla_s32'} } */
+ svmmla_s32 (s32, u8, s8); /* { dg-error {incompatible type for argument 2 of 'svmmla_s32'} } */
+ svmmla_s32 (s32, s8, u8); /* { dg-error {incompatible type for argument 3 of 'svmmla_s32'} } */
+ svmmla_s32 (s32, s8, s32); /* { dg-error {incompatible type for argument 3 of 'svmmla_s32'} } */
+ svmmla_s32 (s32, s8, 0); /* { dg-error {incompatible type for argument 3 of 'svmmla_s32'} } */
+ svmmla_s32 (s32, s8, s8);
+ return svmmla_s32 (s32, s8, s8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */
+}
+
+void
+f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svint32_t s32,
+ svfloat16_t f16, svfloat32_t f32, svfloat64_t f64)
+{
+ svmmla (s32, s8); /* { dg-error {too few arguments to function 'svmmla'} } */
+ svmmla (s32, s8, s8, s8); /* { dg-error {too many arguments to function 'svmmla'} } */
+ svmmla (0, s8, s8); /* { dg-error {passing 'int' to argument 1 of 'svmmla', which expects an SVE vector type} } */
+ svmmla (pg, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svbool_t' arguments} } */
+ svmmla (u8, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svuint8_t' arguments} } */
+
+ svmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE vector type} } */
+ svmmla (s32, u8, s8); /* { dg-error {arguments 1 and 2 of 'svmmla' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */
+ svmmla (s32, s8, u8); /* { dg-error {arguments 1 and 3 of 'svmmla' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */
+ svmmla (s32, s8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE vector type} } */
+ svmmla (s32, s8, s8);
+ svmmla (s32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svmmla', after passing 'svint32_t' to argument 1} } */
+ svmmla (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svint8_t' to argument 2 of 'svmmla', after passing 'svint32_t' to argument 1} } */
+
+ svmmla (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE vector type} } */
+ svmmla (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svmmla' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */
+ svmmla (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svmmla' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */
+ svmmla (u32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE vector type} } */
+ svmmla (u32, u8, u8);
+ svmmla (u32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */
+ svmmla (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */
+
+ svmmla (f16, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */
+ svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
+ svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
+ svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
+ svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */
+ svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
+ svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */
+ svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */
+
+ svmmla (f16, f16, f16); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */
+ svmmla (f32, f32, f32);
+ svmmla (f64, f64, f64);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.2-a+sve" } */
+
+#include <arm_sve.h>
+
+void
+f1 (svint32_t s32, svint8_t s8)
+{
+ svmmla_s32 (s32, s8, s8); /* { dg-error {ACLE function 'svmmla_s32' requires ISA extension 'i8mm'} } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.2-a+sve" } */
+
+#include <arm_sve.h>
+
+void
+f1 (svint32_t s32, svint8_t s8)
+{
+ svmmla (s32, s8, s8); /* { dg-error {ACLE function 'svmmla_s32' requires ISA extension 'i8mm'} } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.2-a+sve" } */
+
+#include <arm_sve.h>
+
+void
+f1 (svfloat32_t f32)
+{
+ svmmla_f32 (f32, f32, f32); /* { dg-error {ACLE function 'svmmla_f32' requires ISA extension 'f32mm'} } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.2-a+sve" } */
+
+#include <arm_sve.h>
+
+void
+f1 (svfloat32_t f32)
+{
+ svmmla (f32, f32, f32); /* { dg-error {ACLE function 'svmmla_f32' requires ISA extension 'f32mm'} } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.2-a+sve" } */
+
+#include <arm_sve.h>
+
+void
+f1 (svfloat64_t f64)
+{
+ svmmla_f64 (f64, f64, f64); /* { dg-error {ACLE function 'svmmla_f64' requires ISA extension 'f64mm'} } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.2-a+sve" } */
+
+#include <arm_sve.h>
+
+void
+f1 (svfloat64_t f64)
+{
+ svmmla (f64, f64, f64); /* { dg-error {ACLE function 'svmmla_f64' requires ISA extension 'f64mm'} } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+ svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64,
+ svfloat32_t f32, int i)
+{
+ svsudot_lane (s32, s8, u8); /* { dg-error {too few arguments to function 'svsudot_lane'} } */
+ svsudot_lane (s32, s8, u8, 0, 0); /* { dg-error {too many arguments to function 'svsudot_lane'} } */
+ svsudot_lane (0, s8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svsudot_lane', which expects an SVE vector type} } */
+ svsudot_lane (pg, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svbool_t' arguments} } */
+ svsudot_lane (u8, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svuint8_t' arguments} } */
+ svsudot_lane (f32, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svfloat32_t' arguments} } */
+ svsudot_lane (u32, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svuint32_t' arguments} } */
+ svsudot_lane (s32, s8, u8, 0);
+ svsudot_lane (s32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svsudot_lane', which expects an SVE vector type} } */
+ svsudot_lane (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svsudot_lane', which expects an SVE vector type} } */
+
+ svsudot_lane (s32, s8, u8, 0);
+ svsudot_lane (s32, u8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsudot_lane', which expects a vector of signed integers} } */
+ svsudot_lane (s32, s8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot_lane', which expects a vector of unsigned integers} } */
+ svsudot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svsudot_lane', after passing 'svint32_t' to argument 1} } */
+
+ svsudot_lane (s32, s8, u8, i); /* { dg-error {argument 4 of 'svsudot_lane' must be an integer constant expression} } */
+ svsudot_lane (s32, s8, u8, 0);
+ svsudot_lane (s32, s8, u8, 3);
+ svsudot_lane (s32, s8, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svsudot_lane', which expects a value in the range \[0, 3\]} } */
+ svsudot_lane (s32, s8, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svsudot_lane', which expects a value in the range \[0, 3\]} } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */
+
+#include <arm_sve.h>
+
+svuint32_t
+f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32)
+{
+ svsudot_s32 (s32); /* { dg-error {too few arguments to function 'svsudot_s32'} } */
+ svsudot_s32 (s32, s8, u8, u32); /* { dg-error {too many arguments to function 'svsudot_s32'} } */
+ svsudot_s32 (s32, s32, u8); /* { dg-error {incompatible type for argument 2 of 'svsudot_s32'} } */
+ svsudot_s32 (s32, u8, u8); /* { dg-error {incompatible type for argument 2 of 'svsudot_s32'} } */
+ svsudot_s32 (s32, s8, u32); /* { dg-error {incompatible type for argument 3 of 'svsudot_s32'} } */
+ svsudot_s32 (s32, s8, s8); /* { dg-error {incompatible type for argument 3 of 'svsudot_s32'} } */
+ svsudot_s32 (s32, s8, 0); /* { dg-error {incompatible type for argument 3 of 'svsudot_s32'} } */
+ svsudot_s32 (s32, s8, u8);
+ return svsudot_s32 (s32, s8, u8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */
+}
+
+void
+f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32,
+ svint32_t s32, svfloat32_t f32)
+{
+ svsudot (s32, s8); /* { dg-error {too few arguments to function 'svsudot'} } */
+ svsudot (s32, s8, u8, u8); /* { dg-error {too many arguments to function 'svsudot'} } */
+ svsudot (0, s8, u8); /* { dg-error {passing 'int' to argument 1 of 'svsudot', which expects an SVE vector type} } */
+ svsudot (pg, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svbool_t' arguments} } */
+ svsudot (u8, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svuint8_t' arguments} } */
+ svsudot (f32, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svfloat32_t' arguments} } */
+ svsudot (s32, s8, u8);
+ svsudot (s32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svsudot', which expects an SVE vector type} } */
+ svsudot (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsudot', which expects a vector of signed integers} } */
+ svsudot (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot', which expects a vector of unsigned integers} } */
+ svsudot (s32, s8, 0);
+ svsudot (s32, s8, u8);
+ svsudot (s32, u32, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svsudot', which expects a vector of signed integers} } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */
+
+#include <arm_sve.h>
+
+svuint32_t
+f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32)
+{
+ svusmmla_s32 (s32); /* { dg-error {too few arguments to function 'svusmmla_s32'} } */
+ svusmmla_s32 (s32, u8, s8, u32); /* { dg-error {too many arguments to function 'svusmmla_s32'} } */
+ svusmmla_s32 (s32, u32, s8); /* { dg-error {incompatible type for argument 2 of 'svusmmla_s32'} } */
+ svusmmla_s32 (s32, s8, s8); /* { dg-error {incompatible type for argument 2 of 'svusmmla_s32'} } */
+ svusmmla_s32 (s32, u8, u8); /* { dg-error {incompatible type for argument 3 of 'svusmmla_s32'} } */
+ svusmmla_s32 (s32, u8, s32); /* { dg-error {incompatible type for argument 3 of 'svusmmla_s32'} } */
+ svusmmla_s32 (s32, u8, 0); /* { dg-error {incompatible type for argument 3 of 'svusmmla_s32'} } */
+ svusmmla_s32 (s32, u8, s8);
+ return svusmmla_s32 (s32, u8, s8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */
+}
+
+void
+f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32,
+ svint32_t s32, svfloat32_t f32)
+{
+ svusmmla (s32, u8); /* { dg-error {too few arguments to function 'svusmmla'} } */
+ svusmmla (s32, u8, s8, u8); /* { dg-error {too many arguments to function 'svusmmla'} } */
+ svusmmla (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusmmla', which expects an SVE vector type} } */
+ svusmmla (pg, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svbool_t' arguments} } */
+ svusmmla (u8, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svuint8_t' arguments} } */
+ svusmmla (f32, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svfloat32_t' arguments} } */
+ svusmmla (s32, u8, s8);
+ svusmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusmmla', which expects an SVE vector type} } */
+ svusmmla (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusmmla', which expects a vector of signed integers} } */
+ svusmmla (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusmmla', which expects a vector of unsigned integers} } */
+ svusmmla (s32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svusmmla', which expects an SVE vector type} } */
+ svusmmla (s32, u8, s8);
+ svusmmla (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svusmmla', after passing 'svint32_t' to argument 1} } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */
+
+#include <arm_sve.h>
+
+void
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
+ svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64,
+ svfloat32_t f32, int i)
+{
+ svusdot_lane (s32, u8, s8); /* { dg-error {too few arguments to function 'svusdot_lane'} } */
+ svusdot_lane (s32, u8, s8, 0, 0); /* { dg-error {too many arguments to function 'svusdot_lane'} } */
+ svusdot_lane (0, u8, s8, 0); /* { dg-error {passing 'int' to argument 1 of 'svusdot_lane', which expects an SVE vector type} } */
+ svusdot_lane (pg, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svbool_t' arguments} } */
+ svusdot_lane (u8, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svuint8_t' arguments} } */
+ svusdot_lane (f32, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svfloat32_t' arguments} } */
+ svusdot_lane (u32, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svuint32_t' arguments} } */
+ svusdot_lane (s32, u8, s8, 0);
+ svusdot_lane (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svusdot_lane', which expects an SVE vector type} } */
+ svusdot_lane (s32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svusdot_lane', which expects an SVE vector type} } */
+
+ svusdot_lane (s32, u8, s8, 0);
+ svusdot_lane (s32, s8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusdot_lane', which expects a vector of unsigned integers} } */
+ svusdot_lane (s32, u8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot_lane', which expects a vector of signed integers} } */
+ svusdot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svusdot_lane', which expects a vector of unsigned integers} } */
+
+ svusdot_lane (s32, u8, s8, i); /* { dg-error {argument 4 of 'svusdot_lane' must be an integer constant expression} } */
+ svusdot_lane (s32, u8, s8, 0);
+ svusdot_lane (s32, u8, s8, 3);
+ svusdot_lane (s32, u8, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svusdot_lane', which expects a value in the range \[0, 3\]} } */
+ svusdot_lane (s32, u8, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svusdot_lane', which expects a value in the range \[0, 3\]} } */
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */
+
+#include <arm_sve.h>
+
+svuint32_t
+f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32)
+{
+ svusdot_s32 (s32); /* { dg-error {too few arguments to function 'svusdot_s32'} } */
+ svusdot_s32 (s32, u8, s8, u32); /* { dg-error {too many arguments to function 'svusdot_s32'} } */
+ svusdot_s32 (s32, u32, s8); /* { dg-error {incompatible type for argument 2 of 'svusdot_s32'} } */
+ svusdot_s32 (s32, s8, s8); /* { dg-error {incompatible type for argument 2 of 'svusdot_s32'} } */
+ svusdot_s32 (s32, u8, u8); /* { dg-error {incompatible type for argument 3 of 'svusdot_s32'} } */
+ svusdot_s32 (s32, u8, s32); /* { dg-error {incompatible type for argument 3 of 'svusdot_s32'} } */
+ svusdot_s32 (s32, u8, 0); /* { dg-error {incompatible type for argument 3 of 'svusdot_s32'} } */
+ svusdot_s32 (s32, u8, s8);
+ return svusdot_s32 (s32, u8, s8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */
+}
+
+void
+f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32,
+ svint32_t s32, svfloat32_t f32)
+{
+ svusdot (s32, u8); /* { dg-error {too few arguments to function 'svusdot'} } */
+ svusdot (s32, u8, s8, u8); /* { dg-error {too many arguments to function 'svusdot'} } */
+ svusdot (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusdot', which expects an SVE vector type} } */
+ svusdot (pg, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svbool_t' arguments} } */
+ svusdot (u8, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svuint8_t' arguments} } */
+ svusdot (f32, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svfloat32_t' arguments} } */
+ svusdot (s32, u8, s8);
+ svusdot (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusdot', which expects an SVE vector type} } */
+ svusdot (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot', which expects a vector of signed integers} } */
+ svusdot (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusdot', which expects a vector of unsigned integers} } */
+ svusdot (s32, u8, 0);
+ svusdot (s32, u8, s8);
+ svusdot (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svusdot', after passing 'svint32_t' to argument 1} } */
+}
# various architecture extensions via the .arch_extension pseudo-op.
foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
- "f64mm" } {
+ "i8mm" "f32mm" "f64mm" } {
eval [string map [list FUNC $aarch64_ext] {
proc check_effective_target_aarch64_asm_FUNC_ok { } {
if { [istarget aarch64*-*-*] } {