From fcfce895d320e916fe82ccc0b51701355770ad3b Mon Sep 17 00:00:00 2001 From: Alan Lawrence Date: Tue, 18 Nov 2014 11:06:05 +0000 Subject: [PATCH] [AArch64]Replace temporary assembler for vld1_dup * config/aarch64/arm_neon.h (vld1_dup_f32, vld1_dup_f64, vld1_dup_p8, vld1_dup_p16, vld1_dup_s8, vld1_dup_s16, vld1_dup_s32, vld1_dup_s64, vld1_dup_u8, vld1_dup_u16, vld1_dup_u32, vld1_dup_u64, vld1q_dup_f32, vld1q_dup_f64, vld1q_dup_p8, vld1q_dup_p16, vld1q_dup_s8, vld1q_dup_s16, vld1q_dup_s32, vld1q_dup_s64, vld1q_dup_u8, vld1q_dup_u16, vld1q_dup_u32, vld1q_dup_u64): Replace inline asm with vdup_n_ and pointer dereference. From-SVN: r217705 --- gcc/ChangeLog | 10 + gcc/config/aarch64/arm_neon.h | 412 ++++++++++++---------------------- 2 files changed, 158 insertions(+), 264 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 78ed17c69e0..d3205b27515 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2014-11-18 Alan Lawrence + + * config/aarch64/arm_neon.h (vld1_dup_f32, vld1_dup_f64, vld1_dup_p8, + vld1_dup_p16, vld1_dup_s8, vld1_dup_s16, vld1_dup_s32, vld1_dup_s64, + vld1_dup_u8, vld1_dup_u16, vld1_dup_u32, vld1_dup_u64, vld1q_dup_f32, + vld1q_dup_f64, vld1q_dup_p8, vld1q_dup_p16, vld1q_dup_s8, vld1q_dup_s16, + vld1q_dup_s32, vld1q_dup_s64, vld1q_dup_u8, vld1q_dup_u16, + vld1q_dup_u32, vld1q_dup_u64): Replace inline asm with vdup_n_ and + pointer dereference. + 2014-11-18 Marc Glisse * tree.c (element_mode, integer_truep): New functions. diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 86d9a8160e4..921a5db1c14 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -6119,270 +6119,6 @@ vhsubq_u32 (uint32x4_t a, uint32x4_t b) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vld1_dup_f32 (const float32_t * a) -{ - float32x2_t result; - __asm__ ("ld1r {%0.2s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vld1_dup_f64 (const float64_t * a) -{ - float64x1_t result; - __asm__ ("ld1r {%0.1d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vld1_dup_p8 (const poly8_t * a) -{ - poly8x8_t result; - __asm__ ("ld1r {%0.8b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vld1_dup_p16 (const poly16_t * a) -{ - poly16x4_t result; - __asm__ ("ld1r {%0.4h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vld1_dup_s8 (const int8_t * a) -{ - int8x8_t result; - __asm__ ("ld1r {%0.8b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vld1_dup_s16 (const int16_t * a) -{ - int16x4_t result; - __asm__ ("ld1r {%0.4h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vld1_dup_s32 (const int32_t * a) -{ - int32x2_t result; - __asm__ ("ld1r {%0.2s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vld1_dup_s64 (const int64_t * a) -{ - int64x1_t result; - __asm__ ("ld1r {%0.1d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vld1_dup_u8 (const uint8_t * a) -{ - uint8x8_t result; - __asm__ ("ld1r {%0.8b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vld1_dup_u16 (const uint16_t * a) -{ - uint16x4_t result; - __asm__ ("ld1r {%0.4h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vld1_dup_u32 (const uint32_t * a) -{ - uint32x2_t result; - __asm__ ("ld1r {%0.2s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vld1_dup_u64 (const uint64_t * a) -{ - uint64x1_t result; - __asm__ ("ld1r {%0.1d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vld1q_dup_f32 (const float32_t * a) -{ - float32x4_t result; - __asm__ ("ld1r {%0.4s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vld1q_dup_f64 (const float64_t * a) -{ - float64x2_t result; - __asm__ ("ld1r {%0.2d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vld1q_dup_p8 (const poly8_t * a) -{ - poly8x16_t result; - __asm__ ("ld1r {%0.16b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vld1q_dup_p16 (const poly16_t * a) -{ - poly16x8_t result; - __asm__ ("ld1r {%0.8h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vld1q_dup_s8 (const int8_t * a) -{ - int8x16_t result; - __asm__ ("ld1r {%0.16b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vld1q_dup_s16 (const int16_t * a) -{ - int16x8_t result; - __asm__ ("ld1r {%0.8h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vld1q_dup_s32 (const int32_t * a) -{ - int32x4_t result; - __asm__ ("ld1r {%0.4s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vld1q_dup_s64 (const int64_t * a) -{ - int64x2_t result; - __asm__ ("ld1r {%0.2d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vld1q_dup_u8 (const uint8_t * a) -{ - uint8x16_t result; - __asm__ ("ld1r {%0.16b}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vld1q_dup_u16 (const uint16_t * a) -{ - uint16x8_t result; - __asm__ ("ld1r {%0.8h}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vld1q_dup_u32 (const uint32_t * a) -{ - uint32x4_t result; - __asm__ ("ld1r {%0.4s}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vld1q_dup_u64 (const uint64_t * a) -{ - uint64x2_t result; - __asm__ ("ld1r {%0.2d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) { @@ -16118,6 +15854,154 @@ vld1q_u64 (const uint64_t *a) __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); } +/* vld1_dup */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_dup_f32 (const float32_t* __a) +{ + return vdup_n_f32 (*__a); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vld1_dup_f64 (const float64_t* __a) +{ + return vdup_n_f64 (*__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_dup_p8 (const poly8_t* __a) +{ + return vdup_n_p8 (*__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_dup_p16 (const poly16_t* __a) +{ + return vdup_n_p16 (*__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_dup_s8 (const int8_t* __a) +{ + return vdup_n_s8 (*__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_dup_s16 (const int16_t* __a) +{ + return vdup_n_s16 (*__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_dup_s32 (const int32_t* __a) +{ + return vdup_n_s32 (*__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_dup_s64 (const int64_t* __a) +{ + return vdup_n_s64 (*__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_dup_u8 (const uint8_t* __a) +{ + return vdup_n_u8 (*__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_dup_u16 (const uint16_t* __a) +{ + return vdup_n_u16 (*__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_dup_u32 (const uint32_t* __a) +{ + return vdup_n_u32 (*__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_dup_u64 (const uint64_t* __a) +{ + return vdup_n_u64 (*__a); +} + +/* vld1q_dup */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_f32 (const float32_t* __a) +{ + return vdupq_n_f32 (*__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_f64 (const float64_t* __a) +{ + return vdupq_n_f64 (*__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_p8 (const poly8_t* __a) +{ + return vdupq_n_p8 (*__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_p16 (const poly16_t* __a) +{ + return vdupq_n_p16 (*__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_s8 (const int8_t* __a) +{ + return vdupq_n_s8 (*__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_s16 (const int16_t* __a) +{ + return vdupq_n_s16 (*__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_s32 (const int32_t* __a) +{ + return vdupq_n_s32 (*__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_s64 (const int64_t* __a) +{ + return vdupq_n_s64 (*__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_u8 (const uint8_t* __a) +{ + return vdupq_n_u8 (*__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_u16 (const uint16_t* __a) +{ + return vdupq_n_u16 (*__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_u32 (const uint32_t* __a) +{ + return vdupq_n_u32 (*__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_u64 (const uint64_t* __a) +{ + return vdupq_n_u64 (*__a); +} + /* vld1_lane */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -- 2.30.2