From 960ceebc9352b0d90cfa9b8af614a7a60cef8578 Mon Sep 17 00:00:00 2001 From: Alan Lawrence Date: Mon, 17 Nov 2014 18:29:49 +0000 Subject: [PATCH] [AArch64] Extend aarch64_simd_vec_set pattern, replace asm for vld1_lane gcc/: * config/aarch64/aarch64-simd.md (aarch64_simd_vec_set): Add variant reading from memory and assembling to ld1. * config/aarch64/arm_neon.h (vld1_lane_f32, vld1_lane_f64, vld1_lane_p8, vld1_lane_p16, vld1_lane_s8, vld1_lane_s16, vld1_lane_s32, vld1_lane_s64, vld1_lane_u8, vld1_lane_u16, vld1_lane_u32, vld1_lane_u64, vld1q_lane_f32, vld1q_lane_f64, vld1q_lane_p8, vld1q_lane_p16, vld1q_lane_s8, vld1q_lane_s16, vld1q_lane_s32, vld1q_lane_s64, vld1q_lane_u8, vld1q_lane_u16, vld1q_lane_u32, vld1q_lane_u64): Replace asm with vset_lane and pointer dereference. gcc/testsuite/: * gcc.target/aarch64/vld1_lane.c: New test. From-SVN: r217665 --- gcc/ChangeLog | 13 + gcc/config/aarch64/aarch64-simd.md | 12 +- gcc/config/aarch64/arm_neon.h | 462 ++++++------------- gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.target/aarch64/vld1_lane.c | 85 ++++ 5 files changed, 258 insertions(+), 318 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vld1_lane.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1b5d8a0be4e..e1d68dea336 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2014-11-17 Alan Lawrence + + * config/aarch64/aarch64-simd.md (aarch64_simd_vec_set): Add + variant reading from memory and assembling to ld1. + + * config/aarch64/arm_neon.h (vld1_lane_f32, vld1_lane_f64, vld1_lane_p8, + vld1_lane_p16, vld1_lane_s8, vld1_lane_s16, vld1_lane_s32, + vld1_lane_s64, vld1_lane_u8, vld1_lane_u16, vld1_lane_u32, + vld1_lane_u64, vld1q_lane_f32, vld1q_lane_f64, vld1q_lane_p8, + vld1q_lane_p16, vld1q_lane_s8, vld1q_lane_s16, vld1q_lane_s32, + vld1q_lane_s64, vld1q_lane_u8, vld1q_lane_u16, vld1q_lane_u32, + vld1q_lane_u64): Replace asm with vset_lane and pointer dereference. + 2014-11-17 Jason Merrill * tree-inline.c (copy_fn): New. diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 148567b3b44..43bfec95702 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -455,12 +455,12 @@ ) (define_insn "aarch64_simd_vec_set" - [(set (match_operand:VQ_S 0 "register_operand" "=w,w") + [(set (match_operand:VQ_S 0 "register_operand" "=w,w,w") (vec_merge:VQ_S (vec_duplicate:VQ_S - (match_operand: 1 "register_operand" "r,w")) - (match_operand:VQ_S 3 "register_operand" "0,0") - (match_operand:SI 2 "immediate_operand" "i,i")))] + (match_operand: 1 "aarch64_simd_general_operand" "r,w,Utv")) + (match_operand:VQ_S 3 "register_operand" "0,0,0") + (match_operand:SI 2 "immediate_operand" "i,i,i")))] "TARGET_SIMD" { int elt = ENDIAN_LANE_N (mode, exact_log2 (INTVAL (operands[2]))); @@ -471,11 +471,13 @@ return "ins\\t%0.[%p2], %w1"; case 1: return "ins\\t%0.[%p2], %1.[0]"; + case 2: + return "ld1\\t{%0.}[%p2], %1"; default: gcc_unreachable (); } } - [(set_attr "type" "neon_from_gp, neon_ins")] + [(set_attr "type" "neon_from_gp, neon_ins, neon_load1_1reg")] ) (define_insn "aarch64_simd_lshr" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 6200f625694..86d9a8160e4 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -604,7 +604,7 @@ typedef struct poly16x8x4_t #define __aarch64_vdupq_laneq_u64(__a, __b) \ __aarch64_vdup_lane_any (u64, q, q, __a, __b) -/* vset_lane internal macro. */ +/* vset_lane and vld1_lane internal macro. */ #ifdef __AARCH64EB__ /* For big-endian, GCC's vector indices are the opposite way around @@ -6251,162 +6251,6 @@ vld1_dup_u64 (const uint64_t * a) return result; } -#define vld1_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x2_t b_ = (b); \ - const float32_t * a_ = (a); \ - float32x2_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x1_t b_ = (b); \ - const float64_t * a_ = (a); \ - float64x1_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_p8(a, b, c) \ - __extension__ \ - ({ \ - poly8x8_t b_ = (b); \ - const poly8_t * a_ = (a); \ - poly8x8_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_p16(a, b, c) \ - __extension__ \ - ({ \ - poly16x4_t b_ = (b); \ - const poly16_t * a_ = (a); \ - poly16x4_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_s8(a, b, c) \ - __extension__ \ - ({ \ - int8x8_t b_ = (b); \ - const int8_t * a_ = (a); \ - int8x8_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x4_t b_ = (b); \ - const int16_t * a_ = (a); \ - int16x4_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x2_t b_ = (b); \ - const int32_t * a_ = (a); \ - int32x2_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x1_t b_ = (b); \ - const int64_t * a_ = (a); \ - int64x1_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_u8(a, b, c) \ - __extension__ \ - ({ \ - uint8x8_t b_ = (b); \ - const uint8_t * a_ = (a); \ - uint8x8_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - const uint16_t * a_ = (a); \ - uint16x4_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - const uint32_t * a_ = (a); \ - uint32x2_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1_lane_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x1_t b_ = (b); \ - const uint64_t * a_ = (a); \ - uint64x1_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ - : "=w"(result) \ - : "i" (c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_dup_f32 (const float32_t * a) { @@ -6539,162 +6383,6 @@ vld1q_dup_u64 (const uint64_t * a) return result; } -#define vld1q_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - const float32_t * a_ = (a); \ - float32x4_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - const float64_t * a_ = (a); \ - float64x2_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_p8(a, b, c) \ - __extension__ \ - ({ \ - poly8x16_t b_ = (b); \ - const poly8_t * a_ = (a); \ - poly8x16_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_p16(a, b, c) \ - __extension__ \ - ({ \ - poly16x8_t b_ = (b); \ - const poly16_t * a_ = (a); \ - poly16x8_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_s8(a, b, c) \ - __extension__ \ - ({ \ - int8x16_t b_ = (b); \ - const int8_t * a_ = (a); \ - int8x16_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - const int16_t * a_ = (a); \ - int16x8_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - const int32_t * a_ = (a); \ - int32x4_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x2_t b_ = (b); \ - const int64_t * a_ = (a); \ - int64x2_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_u8(a, b, c) \ - __extension__ \ - ({ \ - uint8x16_t b_ = (b); \ - const uint8_t * a_ = (a); \ - uint8x16_t result; \ - __asm__ ("ld1 {%0.b}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - const uint16_t * a_ = (a); \ - uint16x8_t result; \ - __asm__ ("ld1 {%0.h}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - const uint32_t * a_ = (a); \ - uint32x4_t result; \ - __asm__ ("ld1 {%0.s}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - -#define vld1q_lane_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x2_t b_ = (b); \ - const uint64_t * a_ = (a); \ - uint64x2_t result; \ - __asm__ ("ld1 {%0.d}[%1], %2" \ - : "=w"(result) \ - : "i"(c), "Utv"(*a_), "0"(b_) \ - : /* No clobbers */); \ - result; \ - }) - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) { @@ -16430,6 +16118,154 @@ vld1q_u64 (const uint64_t *a) __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); } +/* vld1_lane */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane) +{ + return vset_lane_f32 (*__src, __vec, __lane); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vld1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane) +{ + return vset_lane_f64 (*__src, __vec, __lane); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_lane_p8 (const poly8_t *__src, poly8x8_t __vec, const int __lane) +{ + return vset_lane_p8 (*__src, __vec, __lane); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane) +{ + return vset_lane_p16 (*__src, __vec, __lane); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane) +{ + return vset_lane_s8 (*__src, __vec, __lane); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_lane_s16 (const int16_t *__src, int16x4_t __vec, const int __lane) +{ + return vset_lane_s16 (*__src, __vec, __lane); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_lane_s32 (const int32_t *__src, int32x2_t __vec, const int __lane) +{ + return vset_lane_s32 (*__src, __vec, __lane); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane) +{ + return vset_lane_s64 (*__src, __vec, __lane); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_lane_u8 (const uint8_t *__src, uint8x8_t __vec, const int __lane) +{ + return vset_lane_u8 (*__src, __vec, __lane); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_lane_u16 (const uint16_t *__src, uint16x4_t __vec, const int __lane) +{ + return vset_lane_u16 (*__src, __vec, __lane); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_lane_u32 (const uint32_t *__src, uint32x2_t __vec, const int __lane) +{ + return vset_lane_u32 (*__src, __vec, __lane); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) +{ + return vset_lane_u64 (*__src, __vec, __lane); +} + +/* vld1q_lane */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane) +{ + return vsetq_lane_f32 (*__src, __vec, __lane); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vld1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane) +{ + return vsetq_lane_f64 (*__src, __vec, __lane); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_lane_p8 (const poly8_t *__src, poly8x16_t __vec, const int __lane) +{ + return vsetq_lane_p8 (*__src, __vec, __lane); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane) +{ + return vsetq_lane_p16 (*__src, __vec, __lane); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane) +{ + return vsetq_lane_s8 (*__src, __vec, __lane); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_lane_s16 (const int16_t *__src, int16x8_t __vec, const int __lane) +{ + return vsetq_lane_s16 (*__src, __vec, __lane); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_lane_s32 (const int32_t *__src, int32x4_t __vec, const int __lane) +{ + return vsetq_lane_s32 (*__src, __vec, __lane); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane) +{ + return vsetq_lane_s64 (*__src, __vec, __lane); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_lane_u8 (const uint8_t *__src, uint8x16_t __vec, const int __lane) +{ + return vsetq_lane_u8 (*__src, __vec, __lane); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_lane_u16 (const uint16_t *__src, uint16x8_t __vec, const int __lane) +{ + return vsetq_lane_u16 (*__src, __vec, __lane); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_lane_u32 (const uint32_t *__src, uint32x4_t __vec, const int __lane) +{ + return vsetq_lane_u32 (*__src, __vec, __lane); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane) +{ + return vsetq_lane_u64 (*__src, __vec, __lane); +} + /* vldn */ __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6c25013c34a..e8a7fbc7528 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2014-11-17 Alan Lawrence + + * gcc.target/aarch64/vld1_lane.c: New test. + 2014-11-17 Alan Lawrence * gcc.target/aarch64/simd/vfma_f64.c: Add asm volatile memory. diff --git a/gcc/testsuite/gcc.target/aarch64/vld1_lane.c b/gcc/testsuite/gcc.target/aarch64/vld1_lane.c new file mode 100644 index 00000000000..c2445f8df53 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vld1_lane.c @@ -0,0 +1,85 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-inline" } */ + +#include + +extern void abort (void); + +#define VARIANTS(VARIANT) \ +VARIANT (uint8, , 8, _u8, 5) \ +VARIANT (uint16, , 4, _u16, 3) \ +VARIANT (uint32, , 2, _u32, 1) \ +VARIANT (uint64, , 1, _u64, 0) \ +VARIANT (int8, , 8, _s8, 3) \ +VARIANT (int16, , 4, _s16, 2) \ +VARIANT (int32, , 2, _s32, 0) \ +VARIANT (int64, , 1, _s64, 0) \ +VARIANT (poly8, , 8, _p8, 7) \ +VARIANT (poly16, , 4, _p16, 2) \ +VARIANT (float32, , 2, _f32, 1) \ +VARIANT (float64, , 1, _f64, 0) \ +VARIANT (uint8, q, 16, _u8, 13) \ +VARIANT (uint16, q, 8, _u16, 5) \ +VARIANT (uint32, q, 4, _u32, 1) \ +VARIANT (uint64, q, 2, _u64, 0) \ +VARIANT (int8, q, 16, _s8, 15) \ +VARIANT (int16, q, 8, _s16, 3) \ +VARIANT (int32, q, 4, _s32, 1) \ +VARIANT (int64, q, 2, _s64, 1) \ +VARIANT (poly8, q, 16, _p8, 7) \ +VARIANT (poly16, q, 8, _p16, 4) \ +VARIANT (float32, q, 4, _f32, 2)\ +VARIANT (float64, q, 2, _f64, 1) + +#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE) \ +__attribute__((noinline)) BASE##x##ELTS##_t \ +wrap_vld1##Q##_lane##SUFFIX (const BASE##_t *load, \ + BASE##x##ELTS##_t vec) \ +{ return vld1##Q##_lane##SUFFIX (load, vec, LANE); } \ +int \ +test_vld1##Q##_lane##SUFFIX (const BASE##_t *data, \ + const BASE##_t *overwrite) \ +{ \ + BASE##_t out[ELTS]; \ + int j; \ + BASE##x##ELTS##_t in = vld1##Q##SUFFIX (data); \ + in = wrap_vld1##Q##_lane##SUFFIX (overwrite, in); \ + vst1##Q##SUFFIX (out, in); \ + for (j = 0; j < ELTS; j++) \ + if (out[j] != (j == LANE ? *overwrite : data[j])) \ + return 1; \ + return 0; \ +} + + +VARIANTS (TESTMETH) + +#define CHECK(BASE, Q, ELTS, SUFFIX, LANE) \ + if (test_vld1##Q##_lane##SUFFIX ((const BASE##_t *)orig_data, \ + BASE##_data) != 0) \ + abort (); + +int +main (int argc, char **argv) +{ + /* Original data for all vector formats. */ + uint64_t orig_data[2] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL}; + + /* Data with which vldN_lane will overwrite some of previous. */ + uint8_t uint8_data[4] = { 7, 11, 13, 17 }; + uint16_t uint16_data[4] = { 257, 263, 269, 271 }; + uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 }; + uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL, + 0xfedcba9876543210LL, 0xdeadbabecafebeefLL }; + int8_t int8_data[4] = { -1, 3, -5, 7 }; + int16_t int16_data[4] = { 257, -259, 261, -263 }; + int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 }; + int64_t *int64_data = (int64_t *)uint64_data; + poly8_t poly8_data[4] = { 0, 7, 13, 18, }; + poly16_t poly16_data[4] = { 11111, 2222, 333, 44 }; + float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 }; + float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 }; + + VARIANTS (CHECK); + return 0; +} -- 2.30.2