+2020-01-17 Matthew Malcomson <matthew.malcomson@arm.com>
+
+ * config/aarch64/aarch64-protos.h
+ (aarch64_sve_ld1ro_operand_p): New.
+ * config/aarch64/aarch64-sve-builtins-base.cc
+ (class load_replicate): New.
+ (class svld1ro_impl): New.
+ (class svld1rq_impl): Change to inherit from load_replicate.
+ (svld1ro): New sve intrinsic function base.
+ * config/aarch64/aarch64-sve-builtins-base.def (svld1ro):
+ New DEF_SVE_FUNCTION.
+ * config/aarch64/aarch64-sve-builtins-base.h
+ (svld1ro): New decl.
+ * config/aarch64/aarch64-sve-builtins.cc
+ (function_expander::add_mem_operand): Modify assert to allow
+ OImode.
+ * config/aarch64/aarch64-sve.md (@aarch64_sve_ld1ro<mode>): New
+ pattern.
+ * config/aarch64/aarch64.c
+ (aarch64_sve_ld1rq_operand_p): Implement in terms of ...
+ (aarch64_sve_ld1rq_ld1ro_operand_p): This.
+ (aarch64_sve_ld1ro_operand_p): New.
+ * config/aarch64/aarch64.md (UNSPEC_LD1RO): New unspec.
+ * config/aarch64/constraints.md (UOb,UOh,UOw,UOd): New.
+ * config/aarch64/predicates.md
+ (aarch64_sve_ld1ro_operand_{b,h,w,d}): New.
+
2020-01-17 Matthew Malcomson <matthew.malcomson@arm.com>
* config/aarch64/aarch64-c.c (_ARM_FEATURE_MATMUL_FLOAT64):
bool aarch64_simd_mem_operand_p (rtx);
bool aarch64_sve_ld1r_operand_p (rtx);
bool aarch64_sve_ld1rq_operand_p (rtx);
+bool aarch64_sve_ld1ro_operand_p (rtx, scalar_mode);
bool aarch64_sve_ldff1_operand_p (rtx);
bool aarch64_sve_ldnf1_operand_p (rtx);
bool aarch64_sve_ldr_operand_p (rtx);
}
};
-class svld1rq_impl : public function_base
+class load_replicate : public function_base
{
public:
unsigned int
{
return fi.scalar_type (0);
}
+};
+class svld1rq_impl : public load_replicate
+{
+public:
machine_mode
memory_vector_mode (const function_instance &fi) const OVERRIDE
{
}
};
+class svld1ro_impl : public load_replicate
+{
+public:
+ machine_mode
+ memory_vector_mode (const function_instance &fi) const OVERRIDE
+ {
+ return OImode;
+ }
+
+ rtx
+ expand (function_expander &e) const OVERRIDE
+ {
+ insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0));
+ return e.use_contiguous_load_insn (icode);
+ }
+};
+
/* Implements svld2, svld3 and svld4. */
class svld234_impl : public full_width_access
{
FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB))
FUNCTION (svld1, svld1_impl,)
FUNCTION (svld1_gather, svld1_gather_impl,)
+FUNCTION (svld1ro, svld1ro_impl,)
FUNCTION (svld1rq, svld1rq_impl,)
FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8))
FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8))
DEF_SVE_FUNCTION (svzip2, binary, all_data, none)
DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_V8_6 | AARCH64_FL_F64MM
+DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit)
+#undef REQUIRED_EXTENSIONS
extern const function_base *const svlastb;
extern const function_base *const svld1;
extern const function_base *const svld1_gather;
+ extern const function_base *const svld1ro;
extern const function_base *const svld1rq;
extern const function_base *const svld1sb;
extern const function_base *const svld1sb_gather;
void
function_expander::add_mem_operand (machine_mode mode, rtx addr)
{
- gcc_assert (VECTOR_MODE_P (mode));
+ /* Exception for OImode for the ld1ro intrinsics.
+ They act on 256 bit octaword data, and it's just easier to use a scalar
+ mode to represent that than add a new vector mode solely for the purpose
+ of this intrinsic. */
+ gcc_assert (VECTOR_MODE_P (mode) || mode == OImode);
rtx mem = gen_rtx_MEM (mode, memory_address (mode, addr));
/* The memory is only guaranteed to be element-aligned. */
set_mem_align (mem, GET_MODE_ALIGNMENT (GET_MODE_INNER (mode)));
}
)
+(define_insn "@aarch64_sve_ld1ro<mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
+ "UO<Vesize>")]
+ UNSPEC_LD1RO))]
+ "TARGET_SVE && TARGET_ARMV8_6"
+ {
+ operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
+ return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
+ }
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT,FP] Initialize from individual elements
;; -------------------------------------------------------------------------
&& offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
}
-/* Return true if OP is a valid MEM operand for an SVE LD1RQ instruction. */
+/* Return true if OP is a valid MEM operand for an SVE LD1R{Q,O} instruction
+ where the size of the read data is specified by `mode` and the size of the
+ vector elements are specified by `elem_mode`. */
bool
-aarch64_sve_ld1rq_operand_p (rtx op)
+aarch64_sve_ld1rq_ld1ro_operand_p (rtx op, machine_mode mode,
+ scalar_mode elem_mode)
{
struct aarch64_address_info addr;
- scalar_mode elem_mode = GET_MODE_INNER (GET_MODE (op));
if (!MEM_P (op)
|| !aarch64_classify_address (&addr, XEXP (op, 0), elem_mode, false))
return false;
if (addr.type == ADDRESS_REG_IMM)
- return offset_4bit_signed_scaled_p (TImode, addr.const_offset);
+ return offset_4bit_signed_scaled_p (mode, addr.const_offset);
if (addr.type == ADDRESS_REG_REG)
return (1U << addr.shift) == GET_MODE_SIZE (elem_mode);
return false;
}
+/* Return true if OP is a valid MEM operand for an SVE LD1RQ instruction. */
+bool
+aarch64_sve_ld1rq_operand_p (rtx op)
+{
+ return aarch64_sve_ld1rq_ld1ro_operand_p (op, TImode,
+ GET_MODE_INNER (GET_MODE (op)));
+}
+
+/* Return true if OP is a valid MEM operand for an SVE LD1RO instruction for
+ accessing a vector where the element size is specified by `elem_mode`. */
+bool
+aarch64_sve_ld1ro_operand_p (rtx op, scalar_mode elem_mode)
+{
+ return aarch64_sve_ld1rq_ld1ro_operand_p (op, OImode, elem_mode);
+}
+
/* Return true if OP is a valid MEM operand for an SVE LDFF1 instruction. */
bool
aarch64_sve_ldff1_operand_p (rtx op)
UNSPEC_GEN_TAG ; Generate a 4-bit MTE tag.
UNSPEC_GEN_TAG_RND ; Generate a random 4-bit MTE tag.
UNSPEC_TAG_SPACE ; Translate address to MTE tag address space.
+ UNSPEC_LD1RO
])
(define_c_enum "unspecv" [
(and (match_code "mem")
(match_test "aarch64_sve_ld1rq_operand_p (op)")))
+(define_memory_constraint "UOb"
+ "@internal
+ An address valid for SVE LD1ROH."
+ (and (match_code "mem")
+ (match_test "aarch64_sve_ld1ro_operand_p (op, QImode)")))
+
+(define_memory_constraint "UOh"
+ "@internal
+ An address valid for SVE LD1ROH."
+ (and (match_code "mem")
+ (match_test "aarch64_sve_ld1ro_operand_p (op, HImode)")))
+
+
+(define_memory_constraint "UOw"
+ "@internal
+ An address valid for SVE LD1ROW."
+ (and (match_code "mem")
+ (match_test "aarch64_sve_ld1ro_operand_p (op, SImode)")))
+
+(define_memory_constraint "UOd"
+ "@internal
+ An address valid for SVE LD1ROD."
+ (and (match_code "mem")
+ (match_test "aarch64_sve_ld1ro_operand_p (op, DImode)")))
+
(define_memory_constraint "Uty"
"@internal
An address valid for SVE LD1Rs."
(and (match_code "mem")
(match_test "aarch64_sve_ld1rq_operand_p (op)")))
+(define_predicate "aarch64_sve_ld1ro_operand_b"
+ (and (match_code "mem")
+ (match_test "aarch64_sve_ld1ro_operand_p (op, QImode)")))
+
+(define_predicate "aarch64_sve_ld1ro_operand_h"
+ (and (match_code "mem")
+ (match_test "aarch64_sve_ld1ro_operand_p (op, HImode)")))
+
+(define_predicate "aarch64_sve_ld1ro_operand_w"
+ (and (match_code "mem")
+ (match_test "aarch64_sve_ld1ro_operand_p (op, SImode)")))
+
+(define_predicate "aarch64_sve_ld1ro_operand_d"
+ (and (match_code "mem")
+ (match_test "aarch64_sve_ld1ro_operand_p (op, DImode)")))
+
(define_predicate "aarch64_sve_ldff1_operand"
(and (match_code "mem")
(match_test "aarch64_sve_ldff1_operand_p (op)")))
+2020-01-17 Matthew Malcomson <matthew.malcomson@arm.com>
+
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c: New test.
+
2020-01-17 Matthew Malcomson <matthew.malcomson@arm.com>
* gcc.target/aarch64/pragma_cpp_predefs_2.c: Check for f64mm
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_f16_base:
+** ld1roh z0\.h, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_base, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_f16_index:
+** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_index, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_f16_1:
+** add (x[0-9]+), x0, #?2
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_1, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_f16_8:
+** add (x[0-9]+), x0, #?16
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_8, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 + 8),
+ z0 = svld1ro (p0, x0 + 8))
+
+/*
+** ld1ro_f16_128:
+** add (x[0-9]+), x0, #?256
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_128, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 + 128),
+ z0 = svld1ro (p0, x0 + 128))
+
+/*
+** ld1ro_f16_m1:
+** sub (x[0-9]+), x0, #?2
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_m1, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_f16_m8:
+** sub (x[0-9]+), x0, #?16
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_m8, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 - 8),
+ z0 = svld1ro (p0, x0 - 8))
+
+/*
+** ld1ro_f16_m144:
+** sub (x[0-9]+), x0, #?288
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_m144, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 - 144),
+ z0 = svld1ro (p0, x0 - 144))
+
+/*
+** ld1ro_f16_16:
+** ld1roh z0\.h, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_16, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 + 16),
+ z0 = svld1ro (p0, x0 + 16))
+
+/*
+** ld1ro_f16_112:
+** ld1roh z0\.h, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_112, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 + 112),
+ z0 = svld1ro (p0, x0 + 112))
+
+/*
+** ld1ro_f16_m16:
+** ld1roh z0\.h, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_m16, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 - 16),
+ z0 = svld1ro (p0, x0 - 16))
+
+/*
+** ld1ro_f16_m128:
+** ld1roh z0\.h, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_f16_m128, svfloat16_t, float16_t,
+ z0 = svld1ro_f16 (p0, x0 - 128),
+ z0 = svld1ro (p0, x0 - 128))
+
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_f32_base:
+** ld1row z0\.s, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_base, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_f32_index:
+** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_index, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_f32_1:
+** add (x[0-9]+), x0, #?4
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_1, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_f32_4:
+** add (x[0-9]+), x0, #?16
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_4, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 + 4),
+ z0 = svld1ro (p0, x0 + 4))
+
+/*
+** ld1ro_f32_64:
+** add (x[0-9]+), x0, #?256
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_64, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 + 64),
+ z0 = svld1ro (p0, x0 + 64))
+
+/*
+** ld1ro_f32_m1:
+** sub (x[0-9]+), x0, #?4
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_m1, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_f32_m4:
+** sub (x[0-9]+), x0, #?16
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_m4, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 - 4),
+ z0 = svld1ro (p0, x0 - 4))
+
+/*
+** ld1ro_f32_m72:
+** sub (x[0-9]+), x0, #?288
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_m72, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 - 72),
+ z0 = svld1ro (p0, x0 - 72))
+
+/*
+** ld1ro_f32_8:
+** ld1row z0\.s, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_8, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 + 8),
+ z0 = svld1ro (p0, x0 + 8))
+
+/*
+** ld1ro_f32_56:
+** ld1row z0\.s, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_56, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 + 56),
+ z0 = svld1ro (p0, x0 + 56))
+
+/*
+** ld1ro_f32_m8:
+** ld1row z0\.s, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_m8, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 - 8),
+ z0 = svld1ro (p0, x0 - 8))
+
+/*
+** ld1ro_f32_m64:
+** ld1row z0\.s, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_f32_m64, svfloat32_t, float32_t,
+ z0 = svld1ro_f32 (p0, x0 - 64),
+ z0 = svld1ro (p0, x0 - 64))
+
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_f64_base:
+** ld1rod z0\.d, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_base, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_f64_index:
+** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_index, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_f64_1:
+** add (x[0-9]+), x0, #?8
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_1, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_f64_2:
+** add (x[0-9]+), x0, #?16
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_2, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 + 2),
+ z0 = svld1ro (p0, x0 + 2))
+
+/*
+** ld1ro_f64_32:
+** add (x[0-9]+), x0, #?256
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_32, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 + 32),
+ z0 = svld1ro (p0, x0 + 32))
+
+/*
+** ld1ro_f64_m1:
+** sub (x[0-9]+), x0, #?8
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_m1, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_f64_m2:
+** sub (x[0-9]+), x0, #?16
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_m2, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 - 2),
+ z0 = svld1ro (p0, x0 - 2))
+
+/*
+** ld1ro_f64_m36:
+** sub (x[0-9]+), x0, #?288
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_m36, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 - 36),
+ z0 = svld1ro (p0, x0 - 36))
+
+/*
+** ld1ro_f64_4:
+** ld1rod z0\.d, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_4, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 + 4),
+ z0 = svld1ro (p0, x0 + 4))
+
+/*
+** ld1ro_f64_28:
+** ld1rod z0\.d, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_28, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 + 28),
+ z0 = svld1ro (p0, x0 + 28))
+
+/*
+** ld1ro_f64_m4:
+** ld1rod z0\.d, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_m4, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 - 4),
+ z0 = svld1ro (p0, x0 - 4))
+
+/*
+** ld1ro_f64_m32:
+** ld1rod z0\.d, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_f64_m32, svfloat64_t, float64_t,
+ z0 = svld1ro_f64 (p0, x0 - 32),
+ z0 = svld1ro (p0, x0 - 32))
+
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_s16_base:
+** ld1roh z0\.h, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_base, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_s16_index:
+** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_index, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_s16_1:
+** add (x[0-9]+), x0, #?2
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_1, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_s16_8:
+** add (x[0-9]+), x0, #?16
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_8, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 + 8),
+ z0 = svld1ro (p0, x0 + 8))
+
+/*
+** ld1ro_s16_128:
+** add (x[0-9]+), x0, #?256
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_128, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 + 128),
+ z0 = svld1ro (p0, x0 + 128))
+
+/*
+** ld1ro_s16_m1:
+** sub (x[0-9]+), x0, #?2
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_m1, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_s16_m8:
+** sub (x[0-9]+), x0, #?16
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_m8, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 - 8),
+ z0 = svld1ro (p0, x0 - 8))
+
+/*
+** ld1ro_s16_m144:
+** sub (x[0-9]+), x0, #?288
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_m144, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 - 144),
+ z0 = svld1ro (p0, x0 - 144))
+
+/*
+** ld1ro_s16_16:
+** ld1roh z0\.h, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_16, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 + 16),
+ z0 = svld1ro (p0, x0 + 16))
+
+/*
+** ld1ro_s16_112:
+** ld1roh z0\.h, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_112, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 + 112),
+ z0 = svld1ro (p0, x0 + 112))
+
+/*
+** ld1ro_s16_m16:
+** ld1roh z0\.h, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_m16, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 - 16),
+ z0 = svld1ro (p0, x0 - 16))
+
+/*
+** ld1ro_s16_m128:
+** ld1roh z0\.h, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_s16_m128, svint16_t, int16_t,
+ z0 = svld1ro_s16 (p0, x0 - 128),
+ z0 = svld1ro (p0, x0 - 128))
+
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_s32_base:
+** ld1row z0\.s, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_base, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_s32_index:
+** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_index, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_s32_1:
+** add (x[0-9]+), x0, #?4
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_1, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_s32_4:
+** add (x[0-9]+), x0, #?16
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_4, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 + 4),
+ z0 = svld1ro (p0, x0 + 4))
+
+/*
+** ld1ro_s32_64:
+** add (x[0-9]+), x0, #?256
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_64, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 + 64),
+ z0 = svld1ro (p0, x0 + 64))
+
+/*
+** ld1ro_s32_m1:
+** sub (x[0-9]+), x0, #?4
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_m1, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_s32_m4:
+** sub (x[0-9]+), x0, #?16
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_m4, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 - 4),
+ z0 = svld1ro (p0, x0 - 4))
+
+/*
+** ld1ro_s32_m72:
+** sub (x[0-9]+), x0, #?288
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_m72, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 - 72),
+ z0 = svld1ro (p0, x0 - 72))
+
+/*
+** ld1ro_s32_8:
+** ld1row z0\.s, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_8, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 + 8),
+ z0 = svld1ro (p0, x0 + 8))
+
+/*
+** ld1ro_s32_56:
+** ld1row z0\.s, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_56, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 + 56),
+ z0 = svld1ro (p0, x0 + 56))
+
+/*
+** ld1ro_s32_m8:
+** ld1row z0\.s, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_m8, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 - 8),
+ z0 = svld1ro (p0, x0 - 8))
+
+/*
+** ld1ro_s32_m64:
+** ld1row z0\.s, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_s32_m64, svint32_t, int32_t,
+ z0 = svld1ro_s32 (p0, x0 - 64),
+ z0 = svld1ro (p0, x0 - 64))
+
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_s64_base:
+** ld1rod z0\.d, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_base, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_s64_index:
+** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_index, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_s64_1:
+** add (x[0-9]+), x0, #?8
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_1, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_s64_2:
+** add (x[0-9]+), x0, #?16
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_2, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 + 2),
+ z0 = svld1ro (p0, x0 + 2))
+
+/*
+** ld1ro_s64_32:
+** add (x[0-9]+), x0, #?256
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_32, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 + 32),
+ z0 = svld1ro (p0, x0 + 32))
+
+/*
+** ld1ro_s64_m1:
+** sub (x[0-9]+), x0, #?8
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_m1, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_s64_m2:
+** sub (x[0-9]+), x0, #?16
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_m2, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 - 2),
+ z0 = svld1ro (p0, x0 - 2))
+
+/*
+** ld1ro_s64_m36:
+** sub (x[0-9]+), x0, #?288
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_m36, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 - 36),
+ z0 = svld1ro (p0, x0 - 36))
+
+/*
+** ld1ro_s64_4:
+** ld1rod z0\.d, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_4, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 + 4),
+ z0 = svld1ro (p0, x0 + 4))
+
+/*
+** ld1ro_s64_28:
+** ld1rod z0\.d, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_28, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 + 28),
+ z0 = svld1ro (p0, x0 + 28))
+
+/*
+** ld1ro_s64_m4:
+** ld1rod z0\.d, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_m4, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 - 4),
+ z0 = svld1ro (p0, x0 - 4))
+
+/*
+** ld1ro_s64_m32:
+** ld1rod z0\.d, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_s64_m32, svint64_t, int64_t,
+ z0 = svld1ro_s64 (p0, x0 - 32),
+ z0 = svld1ro (p0, x0 - 32))
+
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_s8_base:
+** ld1rob z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_base, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_s8_index:
+** ld1rob z0\.b, p0/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_index, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_s8_1:
+** add (x[0-9]+), x0, #?1
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_1, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_s8_16:
+** add (x[0-9]+), x0, #?16
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_16, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 + 16),
+ z0 = svld1ro (p0, x0 + 16))
+
+/*
+** ld1ro_s8_256:
+** add (x[0-9]+), x0, #?256
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_256, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 + 256),
+ z0 = svld1ro (p0, x0 + 256))
+
+/*
+** ld1ro_s8_m1:
+** sub (x[0-9]+), x0, #?1
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_m1, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_s8_m16:
+** sub (x[0-9]+), x0, #?16
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_m16, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 - 16),
+ z0 = svld1ro (p0, x0 - 16))
+
+/*
+** ld1ro_s8_m288:
+** sub (x[0-9]+), x0, #?288
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_m288, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 - 288),
+ z0 = svld1ro (p0, x0 - 288))
+
+/*
+** ld1ro_s8_32:
+** ld1rob z0\.b, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_32, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 + 32),
+ z0 = svld1ro (p0, x0 + 32))
+
+/*
+** ld1ro_s8_224:
+** ld1rob z0\.b, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_224, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 + 224),
+ z0 = svld1ro (p0, x0 + 224))
+
+/*
+** ld1ro_s8_m32:
+** ld1rob z0\.b, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_m32, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 - 32),
+ z0 = svld1ro (p0, x0 - 32))
+
+/*
+** ld1ro_s8_m256:
+** ld1rob z0\.b, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_s8_m256, svint8_t, int8_t,
+ z0 = svld1ro_s8 (p0, x0 - 256),
+ z0 = svld1ro (p0, x0 - 256))
+
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_u16_base:
+** ld1roh z0\.h, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_base, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_u16_index:
+** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_index, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_u16_1:
+** add (x[0-9]+), x0, #?2
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_1, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_u16_8:
+** add (x[0-9]+), x0, #?16
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_8, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 + 8),
+ z0 = svld1ro (p0, x0 + 8))
+
+/*
+** ld1ro_u16_128:
+** add (x[0-9]+), x0, #?256
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_128, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 + 128),
+ z0 = svld1ro (p0, x0 + 128))
+
+/*
+** ld1ro_u16_m1:
+** sub (x[0-9]+), x0, #?2
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_m1, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_u16_m8:
+** sub (x[0-9]+), x0, #?16
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_m8, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 - 8),
+ z0 = svld1ro (p0, x0 - 8))
+
+/*
+** ld1ro_u16_m144:
+** sub (x[0-9]+), x0, #?288
+** ld1roh z0\.h, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_m144, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 - 144),
+ z0 = svld1ro (p0, x0 - 144))
+
+/*
+** ld1ro_u16_16:
+** ld1roh z0\.h, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_16, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 + 16),
+ z0 = svld1ro (p0, x0 + 16))
+
+/*
+** ld1ro_u16_112:
+** ld1roh z0\.h, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_112, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 + 112),
+ z0 = svld1ro (p0, x0 + 112))
+
+/*
+** ld1ro_u16_m16:
+** ld1roh z0\.h, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_m16, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 - 16),
+ z0 = svld1ro (p0, x0 - 16))
+
+/*
+** ld1ro_u16_m128:
+** ld1roh z0\.h, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_u16_m128, svuint16_t, uint16_t,
+ z0 = svld1ro_u16 (p0, x0 - 128),
+ z0 = svld1ro (p0, x0 - 128))
+
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_u32_base:
+** ld1row z0\.s, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_base, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_u32_index:
+** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_index, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_u32_1:
+** add (x[0-9]+), x0, #?4
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_1, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_u32_4:
+** add (x[0-9]+), x0, #?16
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_4, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 + 4),
+ z0 = svld1ro (p0, x0 + 4))
+
+/*
+** ld1ro_u32_64:
+** add (x[0-9]+), x0, #?256
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_64, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 + 64),
+ z0 = svld1ro (p0, x0 + 64))
+
+/*
+** ld1ro_u32_m1:
+** sub (x[0-9]+), x0, #?4
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_m1, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_u32_m4:
+** sub (x[0-9]+), x0, #?16
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_m4, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 - 4),
+ z0 = svld1ro (p0, x0 - 4))
+
+/*
+** ld1ro_u32_m72:
+** sub (x[0-9]+), x0, #?288
+** ld1row z0\.s, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_m72, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 - 72),
+ z0 = svld1ro (p0, x0 - 72))
+
+/*
+** ld1ro_u32_8:
+** ld1row z0\.s, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_8, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 + 8),
+ z0 = svld1ro (p0, x0 + 8))
+
+/*
+** ld1ro_u32_56:
+** ld1row z0\.s, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_56, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 + 56),
+ z0 = svld1ro (p0, x0 + 56))
+
+/*
+** ld1ro_u32_m8:
+** ld1row z0\.s, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_m8, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 - 8),
+ z0 = svld1ro (p0, x0 - 8))
+
+/*
+** ld1ro_u32_m64:
+** ld1row z0\.s, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_u32_m64, svuint32_t, uint32_t,
+ z0 = svld1ro_u32 (p0, x0 - 64),
+ z0 = svld1ro (p0, x0 - 64))
+
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_u64_base:
+** ld1rod z0\.d, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_base, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_u64_index:
+** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_index, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_u64_1:
+** add (x[0-9]+), x0, #?8
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_1, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_u64_2:
+** add (x[0-9]+), x0, #?16
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_2, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 + 2),
+ z0 = svld1ro (p0, x0 + 2))
+
+/*
+** ld1ro_u64_32:
+** add (x[0-9]+), x0, #?256
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_32, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 + 32),
+ z0 = svld1ro (p0, x0 + 32))
+
+/*
+** ld1ro_u64_m1:
+** sub (x[0-9]+), x0, #?8
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_m1, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_u64_m2:
+** sub (x[0-9]+), x0, #?16
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_m2, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 - 2),
+ z0 = svld1ro (p0, x0 - 2))
+
+/*
+** ld1ro_u64_m36:
+** sub (x[0-9]+), x0, #?288
+** ld1rod z0\.d, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_m36, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 - 36),
+ z0 = svld1ro (p0, x0 - 36))
+
+/*
+** ld1ro_u64_4:
+** ld1rod z0\.d, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_4, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 + 4),
+ z0 = svld1ro (p0, x0 + 4))
+
+/*
+** ld1ro_u64_28:
+** ld1rod z0\.d, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_28, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 + 28),
+ z0 = svld1ro (p0, x0 + 28))
+
+/*
+** ld1ro_u64_m4:
+** ld1rod z0\.d, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_m4, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 - 4),
+ z0 = svld1ro (p0, x0 - 4))
+
+/*
+** ld1ro_u64_m32:
+** ld1rod z0\.d, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_u64_m32, svuint64_t, uint64_t,
+ z0 = svld1ro_u64 (p0, x0 - 32),
+ z0 = svld1ro (p0, x0 - 32))
+
--- /dev/null
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */
+
+#include "test_sve_acle.h"
+
+/*
+** ld1ro_u8_base:
+** ld1rob z0\.b, p0/z, \[x0\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_base, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0),
+ z0 = svld1ro (p0, x0))
+
+/*
+** ld1ro_u8_index:
+** ld1rob z0\.b, p0/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_index, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 + x1),
+ z0 = svld1ro (p0, x0 + x1))
+
+/*
+** ld1ro_u8_1:
+** add (x[0-9]+), x0, #?1
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_1, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 + 1),
+ z0 = svld1ro (p0, x0 + 1))
+
+/*
+** ld1ro_u8_16:
+** add (x[0-9]+), x0, #?16
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_16, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 + 16),
+ z0 = svld1ro (p0, x0 + 16))
+
+/*
+** ld1ro_u8_256:
+** add (x[0-9]+), x0, #?256
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_256, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 + 256),
+ z0 = svld1ro (p0, x0 + 256))
+
+/*
+** ld1ro_u8_m1:
+** sub (x[0-9]+), x0, #?1
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_m1, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 - 1),
+ z0 = svld1ro (p0, x0 - 1))
+
+/*
+** ld1ro_u8_m16:
+** sub (x[0-9]+), x0, #?16
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_m16, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 - 16),
+ z0 = svld1ro (p0, x0 - 16))
+
+/*
+** ld1ro_u8_m288:
+** sub (x[0-9]+), x0, #?288
+** ld1rob z0\.b, p0/z, \[\1\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_m288, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 - 288),
+ z0 = svld1ro (p0, x0 - 288))
+
+/*
+** ld1ro_u8_32:
+** ld1rob z0\.b, p0/z, \[x0, #?32\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_32, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 + 32),
+ z0 = svld1ro (p0, x0 + 32))
+
+/*
+** ld1ro_u8_224:
+** ld1rob z0\.b, p0/z, \[x0, #?224\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_224, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 + 224),
+ z0 = svld1ro (p0, x0 + 224))
+
+/*
+** ld1ro_u8_m32:
+** ld1rob z0\.b, p0/z, \[x0, #?-32\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_m32, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 - 32),
+ z0 = svld1ro (p0, x0 - 32))
+
+/*
+** ld1ro_u8_m256:
+** ld1rob z0\.b, p0/z, \[x0, #?-256\]
+** ret
+*/
+TEST_LOAD (ld1ro_u8_m256, svuint8_t, uint8_t,
+ z0 = svld1ro_u8 (p0, x0 - 256),
+ z0 = svld1ro (p0, x0 - 256))
+