result; \
})
-#define vst1_lane_f32(a, b, c) \
- __extension__ \
- ({ \
- float32x2_t b_ = (b); \
- float32_t * a_ = (a); \
- __asm__ ("st1 {%1.s}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_f64(a, b, c) \
- __extension__ \
- ({ \
- float64x1_t b_ = (b); \
- float64_t * a_ = (a); \
- __asm__ ("st1 {%1.d}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_p8(a, b, c) \
- __extension__ \
- ({ \
- poly8x8_t b_ = (b); \
- poly8_t * a_ = (a); \
- __asm__ ("st1 {%1.b}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_p16(a, b, c) \
- __extension__ \
- ({ \
- poly16x4_t b_ = (b); \
- poly16_t * a_ = (a); \
- __asm__ ("st1 {%1.h}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_s8(a, b, c) \
- __extension__ \
- ({ \
- int8x8_t b_ = (b); \
- int8_t * a_ = (a); \
- __asm__ ("st1 {%1.b}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_s16(a, b, c) \
- __extension__ \
- ({ \
- int16x4_t b_ = (b); \
- int16_t * a_ = (a); \
- __asm__ ("st1 {%1.h}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_s32(a, b, c) \
- __extension__ \
- ({ \
- int32x2_t b_ = (b); \
- int32_t * a_ = (a); \
- __asm__ ("st1 {%1.s}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_s64(a, b, c) \
- __extension__ \
- ({ \
- int64x1_t b_ = (b); \
- int64_t * a_ = (a); \
- __asm__ ("st1 {%1.d}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_u8(a, b, c) \
- __extension__ \
- ({ \
- uint8x8_t b_ = (b); \
- uint8_t * a_ = (a); \
- __asm__ ("st1 {%1.b}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_u16(a, b, c) \
- __extension__ \
- ({ \
- uint16x4_t b_ = (b); \
- uint16_t * a_ = (a); \
- __asm__ ("st1 {%1.h}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_u32(a, b, c) \
- __extension__ \
- ({ \
- uint32x2_t b_ = (b); \
- uint32_t * a_ = (a); \
- __asm__ ("st1 {%1.s}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1_lane_u64(a, b, c) \
- __extension__ \
- ({ \
- uint64x1_t b_ = (b); \
- uint64_t * a_ = (a); \
- __asm__ ("st1 {%1.d}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-
-#define vst1q_lane_f32(a, b, c) \
- __extension__ \
- ({ \
- float32x4_t b_ = (b); \
- float32_t * a_ = (a); \
- __asm__ ("st1 {%1.s}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_f64(a, b, c) \
- __extension__ \
- ({ \
- float64x2_t b_ = (b); \
- float64_t * a_ = (a); \
- __asm__ ("st1 {%1.d}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_p8(a, b, c) \
- __extension__ \
- ({ \
- poly8x16_t b_ = (b); \
- poly8_t * a_ = (a); \
- __asm__ ("st1 {%1.b}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_p16(a, b, c) \
- __extension__ \
- ({ \
- poly16x8_t b_ = (b); \
- poly16_t * a_ = (a); \
- __asm__ ("st1 {%1.h}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_s8(a, b, c) \
- __extension__ \
- ({ \
- int8x16_t b_ = (b); \
- int8_t * a_ = (a); \
- __asm__ ("st1 {%1.b}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_s16(a, b, c) \
- __extension__ \
- ({ \
- int16x8_t b_ = (b); \
- int16_t * a_ = (a); \
- __asm__ ("st1 {%1.h}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_s32(a, b, c) \
- __extension__ \
- ({ \
- int32x4_t b_ = (b); \
- int32_t * a_ = (a); \
- __asm__ ("st1 {%1.s}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_s64(a, b, c) \
- __extension__ \
- ({ \
- int64x2_t b_ = (b); \
- int64_t * a_ = (a); \
- __asm__ ("st1 {%1.d}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_u8(a, b, c) \
- __extension__ \
- ({ \
- uint8x16_t b_ = (b); \
- uint8_t * a_ = (a); \
- __asm__ ("st1 {%1.b}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_u16(a, b, c) \
- __extension__ \
- ({ \
- uint16x8_t b_ = (b); \
- uint16_t * a_ = (a); \
- __asm__ ("st1 {%1.h}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_u32(a, b, c) \
- __extension__ \
- ({ \
- uint32x4_t b_ = (b); \
- uint32_t * a_ = (a); \
- __asm__ ("st1 {%1.s}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-#define vst1q_lane_u64(a, b, c) \
- __extension__ \
- ({ \
- uint64x2_t b_ = (b); \
- uint64_t * a_ = (a); \
- __asm__ ("st1 {%1.d}[%2],[%0]" \
- : \
- : "r"(a_), "w"(b_), "i"(c) \
- : "memory"); \
- })
-
-
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vtst_p8 (poly8x8_t a, poly8x8_t b)
{
*a = b[0];
}
+/* vst1q */
+
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f32 (float32_t *a, float32x4_t b)
{
__builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
}
-/* vst1q */
-
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_p8 (poly8_t *a, poly8x16_t b)
{
(int64x2_t) b);
}
+/* vst1_lane */
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+/* vst1q_lane */
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane)
+{
+ *__a = __aarch64_vget_lane_any (__b, __lane);
+}
+
/* vstn */
__extension__ static __inline void