return __rv.__i;
}
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2_lane_bf16 (bfloat16_t * __a, bfloat16x4x2_t __b, const int __c)
+{
+ union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ __builtin_neon_vst2_lanev4bf (__a, __bu.__o, __c);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst2q_lane_bf16 (bfloat16_t * __a, bfloat16x8x2_t __b, const int __c)
+{
+ union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst2_lanev8bf (__a, __bu.__o, __c);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3_lane_bf16 (bfloat16_t * __a, bfloat16x4x3_t __b, const int __c)
+{
+ union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ __builtin_neon_vst3_lanev4bf (__a, __bu.__o, __c);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst3q_lane_bf16 (bfloat16_t * __a, bfloat16x8x3_t __b, const int __c)
+{
+ union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ __builtin_neon_vst3_lanev8bf (__a, __bu.__o, __c);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4_lane_bf16 (bfloat16_t * __a, bfloat16x4x4_t __b, const int __c)
+{
+ union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ __builtin_neon_vst4_lanev4bf (__a, __bu.__o, __c);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vst4q_lane_bf16 (bfloat16_t * __a, bfloat16x8x4_t __b, const int __c)
+{
+ union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ __builtin_neon_vst4_lanev8bf (__a, __bu.__o, __c);
+}
+
#pragma GCC pop_options
#ifdef __cplusplus
VAR8 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
VAR13 (STORE1, vst2,
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
-VAR9 (STORE1LANE, vst2_lane,
- v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (STORE1LANE, vst2_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR13 (LOAD1, vld3,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR11 (LOAD1LANE, vld3_lane,
VAR8 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
VAR13 (STORE1, vst3,
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
-VAR9 (STORE1LANE, vst3_lane,
- v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (STORE1LANE, vst3_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR13 (LOAD1, vld4,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR11 (LOAD1LANE, vld4_lane,
VAR8 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
VAR13 (STORE1, vst4,
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
-VAR9 (STORE1LANE, vst4_lane,
- v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (STORE1LANE, vst4_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR2 (TERNOP, sdot, v8qi, v16qi)
VAR2 (UTERNOP, udot, v8qi, v16qi)
VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi)
--- /dev/null
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+**test_vst2_lane_bf16:
+** vst2.16 {d0\[2\], d1\[2\]}, \[r0\]
+** bx lr
+*/
+void
+test_vst2_lane_bf16 (bfloat16_t *a, bfloat16x4x2_t b)
+{
+ return vst2_lane_bf16 (a, b, 2);
+}
+
+/*
+**test_vst2q_lane_bf16:
+** vst2.16 {d0\[2\], d2\[2\]}, \[r0\]
+** bx lr
+*/
+void
+test_vst2q_lane_bf16 (bfloat16_t *a, bfloat16x8x2_t b)
+{
+ return vst2q_lane_bf16 (a, b, 2);
+}
+
+/*
+**test_vst3_lane_bf16:
+** vst3.16 {d0\[2\], d1\[2\], d2\[2\]}, \[r0\]
+** bx lr
+*/
+void
+test_vst3_lane_bf16 (bfloat16_t *a, bfloat16x4x3_t b)
+{
+ return vst3_lane_bf16 (a, b, 2);
+}
+
+/*
+**test_vst3q_lane_bf16:
+** vst3.16 {d0\[2\], d2\[2\], d4\[2\]}, \[r0\]
+** bx lr
+*/
+void
+test_vst3q_lane_bf16 (bfloat16_t *a, bfloat16x8x3_t b)
+{
+ return vst3q_lane_bf16 (a, b, 2);
+}
+
+/*
+**test_vst4_lane_bf16:
+** vst4.16 {d0\[2\], d1\[2\], d2\[2\], d3\[2\]}, \[r0\]
+** bx lr
+*/
+void
+test_vst4_lane_bf16 (bfloat16_t *a, bfloat16x4x4_t b)
+{
+ return vst4_lane_bf16 (a, b, 2);
+}
+
+/*
+**test_vst4q_lane_bf16:
+** vst4.16 {d0\[2\], d2\[2\], d4\[2\], d6\[2\]}, \[r0\]
+** bx lr
+*/
+void
+test_vst4q_lane_bf16 (bfloat16_t *a, bfloat16x8x4_t b)
+{
+ return vst4q_lane_bf16 (a, b, 2);
+}